Browse Source

feat: 变更提取修改变更状态规则

许家凯 3 years ago
parent
commit
542b97b652

+ 156 - 165
src/main/scala/com/winhc/bigdata/spark/ng/change/NgChangeExtract.scala

@@ -1,6 +1,7 @@
 package com.winhc.bigdata.spark.ng.change
 
 import com.winhc.bigdata.spark.config.EsConfig
+import com.winhc.bigdata.spark.ng.change.NgChangeExtract.getDoubleDataMap
 import com.winhc.bigdata.spark.utils.BaseUtil.isWindows
 import com.winhc.bigdata.spark.utils._
 import org.apache.spark.internal.Logging
@@ -14,181 +15,171 @@ import scala.collection.mutable
 /**
  * @Description: 筛选出数据的具体变更
  */
-object NgChangeExtract {
 
-  //判断两个map在指定key上是否相等,如不等反回不相等字段
-  def getDoubleDataMap(iterable: Iterable[Map[String, String]]): (Map[String, String], Map[String, String]) = {
-    val map = iterable.map(m => (m("change_flag"), m)).toMap
-    (map.getOrElse("0", null), map.getOrElse("1", null))
+case class NgChangeExtract(s: SparkSession,
+                           project: String, //表所在工程名
+                           tableName1: String, //表名(不加前后辍)
+                           primaryKey: String, //此维度主键
+                           inc_ds: String, //需要计算的分区
+                           primaryFields: Seq[String] //主要字段,该字段任意一个不同 则认为发生变化
+                          ) extends LoggingUtils with Logging {
+  @(transient@getter) val spark: SparkSession = s
+
+  val target_tab = "bds_change_extract"
+
+  def init() {
+    sql(
+      s"""
+         |CREATE TABLE IF NOT EXISTS `$project`.`$target_tab` (
+         |  `rowkey` STRING COMMENT '该行数据主键',
+         |  `company_id` STRING comment '公司id',
+         |  `table_name` STRING comment 'hbase表名',
+         |  `update_type` STRING comment '数据展示层面的变更类型,insert、update、deleted、other',
+         |  `old_data` MAP<STRING,STRING> COMMENT '原数据',
+         |  `new_data` MAP<STRING,STRING> COMMENT '新数据',
+         |  `change_fields` STRING comment '哪些字段发生变更',
+         |  `biz_date` STRING comment '数据变更的时间',
+         |  `update_time` STRING comment '当前计算时间')
+         | COMMENT '变更动态'
+         |PARTITIONED BY (
+         |  `ds` STRING COMMENT '时间分区',
+         |  `tn` STRING COMMENT '表名分区')
+         |""".stripMargin)
   }
 
-  case class ChangeExtractHandle(s: SparkSession,
-                                 project: String, //表所在工程名
-                                 tableName1: String, //表名(不加前后辍)
-                                 primaryKey: String, //此维度主键
-                                 inc_ds: String, //需要计算的分区
-                                 primaryFields: Seq[String] //主要字段,该字段任意一个不同 则认为发生变化
-                                ) extends LoggingUtils with Logging {
-    @(transient@getter) val spark: SparkSession = s
-
-    val target_tab = "bds_change_extract"
-
-    def init() {
-      sql(
-        s"""
-           |CREATE TABLE IF NOT EXISTS `$project`.`$target_tab` (
-           |  `rowkey` STRING COMMENT '该行数据主键',
-           |  `company_id` STRING comment '公司id',
-           |  `table_name` STRING comment 'hbase表名',
-           |  `update_type` STRING comment '数据展示层面的变更类型,insert、update、deleted、other',
-           |  `old_data` MAP<STRING,STRING> COMMENT '原数据',
-           |  `new_data` MAP<STRING,STRING> COMMENT '新数据',
-           |  `change_fields` STRING comment '哪些字段发生变更',
-           |  `biz_date` STRING comment '数据变更的时间',
-           |  `update_time` STRING comment '当前计算时间')
-           | COMMENT '变更动态'
-           |PARTITIONED BY (
-           |  `ds` STRING COMMENT '时间分区',
-           |  `tn` STRING COMMENT '表名分区')
-           |""".stripMargin)
-    }
 
-
-    val updateTimeMapping = Map(
-      "wenshu_detail_combine" -> "update_date", //文书排序时间
-      "company_equity_info_list" -> "reg_date" //文书排序时间
+  val updateTimeMapping = Map(
+    "wenshu_detail_combine" -> "update_date", //文书排序时间
+    "company_equity_info_list" -> "reg_date" //文书排序时间
+  )
+  //不同name映射table
+  val tabMapping =
+    Map("company_holder_v2" -> "company_holder" //胜诉案件
     )
-    //不同name映射table
-    val tabMapping =
-      Map("company_holder_v2" -> "company_holder" //胜诉案件
-      )
-
-    //转换字段
-    def trans(s: String): String = {
-      var res = s
-      if (tabMapping.contains(s)) {
-        res = tabMapping(s)
-      }
-      res
-    }
 
-    def calc(): Unit = {
-      val tableName = trans(tableName1)
-      val cols = primaryFields.filter(!_.equals(primaryKey)).seq
-
-      val ds = inc_ds.replace("-", "")
-
-      val intersectCols = getColumns(s"$project.ads_$tableName").toSet & getColumns(s"$project.inc_ads_$tableName").toSet
-
-      val otherAllCols = intersectCols.filter(!primaryKey.equals(_)).toSeq
-      val all_cols = primaryKey +: otherAllCols :+ "change_flag"
-
-      val lastDs_ads_all = getLastPartitionsOrElse(s"$project.ads_$tableName", "0")
-
-      val handle = ReflectUtils.getClazz[NgCompanyChangeHandle](s"com.winhc.bigdata.spark.ng.change.table.$tableName1", cols)
-
-
-      val df = sql(
-        s"""
-           |SELECT  $primaryKey,${otherAllCols.mkString(",")},'0' as change_flag
-           |FROM    $project.inc_ads_$tableName
-           |WHERE   ds = $ds
-           |UNION ALL
-           |SELECT  t2.$primaryKey,${otherAllCols.map("t2." + _).mkString(",")},'1' as change_flag
-           |FROM    (
-           |            SELECT  DISTINCT ${primaryKey}
-           |            FROM    $project.inc_ads_$tableName
-           |            WHERE   ds = $ds
-           |        ) AS t1
-           |JOIN    (
-           |             SELECT  tmp.*
-           |             FROM    (
-           |                         SELECT  a.*
-           |                                 ,row_number() OVER (PARTITION BY a.${primaryKey} ORDER BY ds DESC, ${updateTimeMapping.getOrElse(tableName, "update_time")} DESC) c
-           |                         FROM    (
-           |                                     SELECT  ${intersectCols.mkString(",")},ds
-           |                                     FROM    $project.ads_$tableName
-           |                                     WHERE   ds = $lastDs_ads_all
-           |                                     UNION ALL
-           |                                     SELECT  ${intersectCols.mkString(",")},ds
-           |                                     FROM    $project.inc_ads_$tableName
-           |                                     WHERE   ds > $lastDs_ads_all and ds < $ds
-           |                                 ) AS a
-           |                     ) AS tmp
-           |             WHERE   tmp.c = 1
-           |        ) AS t2
-           |ON      t1.${primaryKey} = t2.${primaryKey}
-           |""".stripMargin)
-
-
-      val rdd =
-        df.select(all_cols.map(column => col(column).cast("string")): _*)
-          .rdd.map(r => {
-          (r.getAs[String](primaryKey), all_cols.map(f => (f, r.getAs[String](f))).toMap)
-        }).groupByKey()
-          .map(x => {
-            val rowkey = x._1
-            val map_list = x._2
-            val m = getDoubleDataMap(map_list)
-
-            val new_map = m._1
-            val old_map = m._2
-            val res = handle.handle(rowkey, old_map, new_map)
-            if (res == null) {
-              null
-            } else {
-              val rowkey = res._1
-              val company_id = res._2
-              val update_type = res._3
-              val old_map = res._4
-              val new_map = res._5
-              val change_fields = res._6
-              val biz_date = res._7
-              val update_time = BaseUtil.nowDate()
-
-              Row(rowkey, company_id, tableName, update_type, old_map, new_map, change_fields, biz_date, update_time)
-            }
-          }).filter(_ != null)
-
-      val schema = StructType(Array(
-        StructField("rowkey", StringType), //表数据主建
-        StructField("company_id", StringType), //公司id
-        StructField("table_name", StringType), //表名
-        StructField("update_type", StringType), // 变更类型 insert update
-        StructField("old_data", MapType(StringType, StringType)), //变更前数据
-        StructField("new_data", MapType(StringType, StringType)), //变更后数据
-        StructField("change_fields", StringType), //如果是更新 则显示更新字段
-        StructField("biz_date", StringType), //业务时间
-        StructField("update_time", StringType) //处理时间
-      ))
-
-      spark.createDataFrame(rdd, schema)
-        .createOrReplaceTempView(s"tmp_change_extract_view_$tableName1")
-
-      sql(
-        s"""
-           |INSERT ${if (isWindows) "INTO" else "OVERWRITE"} TABLE ${project}.$target_tab PARTITION(ds='$ds',tn='$tableName1')
-           |SELECT *
-           |FROM
-           |    tmp_change_extract_view_$tableName1
-           |""".stripMargin)
+  //转换字段
+  def trans(s: String): String = {
+    var res = s
+    if (tabMapping.contains(s)) {
+      res = tabMapping(s)
     }
+    res
   }
 
+  def calc(): Unit = {
+    val tableName = trans(tableName1)
+    val cols = primaryFields.filter(!_.equals(primaryKey)).seq
+
+    val ds = inc_ds.replace("-", "")
+
+    val intersectCols = getColumns(s"$project.ads_$tableName").toSet & getColumns(s"$project.inc_ads_$tableName").toSet
+
+    val otherAllCols = intersectCols.filter(!primaryKey.equals(_)).toSeq
+    val all_cols = primaryKey +: otherAllCols :+ "change_flag"
+
+    val lastDs_ads_all = getLastPartitionsOrElse(s"$project.ads_$tableName", "0")
+
+    val handle = ReflectUtils.getClazz[NgCompanyChangeHandle](s"com.winhc.bigdata.spark.ng.change.table.$tableName1", cols)
+
+
+    val df = sql(
+      s"""
+         |SELECT  $primaryKey,${otherAllCols.mkString(",")},'0' as change_flag
+         |FROM    $project.inc_ads_$tableName
+         |WHERE   ds = $ds
+         |UNION ALL
+         |SELECT  t2.$primaryKey,${otherAllCols.map("t2." + _).mkString(",")},'1' as change_flag
+         |FROM    (
+         |            SELECT  DISTINCT ${primaryKey}
+         |            FROM    $project.inc_ads_$tableName
+         |            WHERE   ds = $ds
+         |        ) AS t1
+         |JOIN    (
+         |             SELECT  tmp.*
+         |             FROM    (
+         |                         SELECT  a.*
+         |                                 ,row_number() OVER (PARTITION BY a.${primaryKey} ORDER BY ds DESC, ${updateTimeMapping.getOrElse(tableName, "update_time")} DESC) c
+         |                         FROM    (
+         |                                     SELECT  ${intersectCols.mkString(",")},ds
+         |                                     FROM    $project.ads_$tableName
+         |                                     WHERE   ds = $lastDs_ads_all
+         |                                     UNION ALL
+         |                                     SELECT  ${intersectCols.mkString(",")},ds
+         |                                     FROM    $project.inc_ads_$tableName
+         |                                     WHERE   ds > $lastDs_ads_all and ds < $ds
+         |                                 ) AS a
+         |                     ) AS tmp
+         |             WHERE   tmp.c = 1
+         |        ) AS t2
+         |ON      t1.${primaryKey} = t2.${primaryKey}
+         |""".stripMargin)
+
+
+    val rdd =
+      df.select(all_cols.map(column => col(column).cast("string")): _*)
+        .rdd.map(r => {
+        (r.getAs[String](primaryKey), all_cols.map(f => (f, r.getAs[String](f))).toMap)
+      }).groupByKey()
+        .map(x => {
+          val rowkey = x._1
+          val map_list = x._2
+          val m = getDoubleDataMap(map_list)
+
+          val new_map = m._1
+          val old_map = m._2
+          val res = handle.handle(rowkey, old_map, new_map)
+          if (res == null) {
+            null
+          } else {
+            val rowkey = res._1
+            val company_id = res._2
+            val update_type = res._3
+            val old_map = res._4
+            val new_map = res._5
+            val change_fields = res._6
+            val biz_date = res._7
+            val update_time = BaseUtil.nowDate()
+
+            Row(rowkey, company_id, tableName, update_type, old_map, new_map, change_fields, biz_date, update_time)
+          }
+        }).filter(_ != null)
+
+    val schema = StructType(Array(
+      StructField("rowkey", StringType), //表数据主建
+      StructField("company_id", StringType), //公司id
+      StructField("table_name", StringType), //表名
+      StructField("update_type", StringType), // 变更类型 insert update
+      StructField("old_data", MapType(StringType, StringType)), //变更前数据
+      StructField("new_data", MapType(StringType, StringType)), //变更后数据
+      StructField("change_fields", StringType), //如果是更新 则显示更新字段
+      StructField("biz_date", StringType), //业务时间
+      StructField("update_time", StringType) //处理时间
+    ))
+
+    spark.createDataFrame(rdd, schema)
+      .createOrReplaceTempView(s"tmp_change_extract_view_$tableName1")
+
+    sql(
+      s"""
+         |INSERT ${if (isWindows) "INTO" else "OVERWRITE"} TABLE ${project}.$target_tab PARTITION(ds='$ds',tn='$tableName1')
+         |SELECT *
+         |FROM
+         |    tmp_change_extract_view_$tableName1
+         |""".stripMargin)
+  }
+}
 
-  private val startArgs = Seq(
-    Args(tableName = "company_holder", primaryFields = "holder_id,percent,amount,deleted")
-    , Args(tableName = "company_staff", primaryFields = "staff_type,deleted")
-    , Args(tableName = "company", primaryKey = "company_id", primaryFields = "name,cate_third_code,county_code,reg_capital_amount,legal_entity_name,legal_entity_id,deleted")
-    , Args(tableName = "company_tm", primaryFields = "status")
-    , Args(tableName = "company_icp", primaryFields = "domain")
-  )
 
 
-  private case class Args(project: String = "winhc_ng"
-                          , tableName: String
-                          , primaryKey: String = "rowkey"
-                          , primaryFields: String)
+
+object NgChangeExtract {
+
+  //判断两个map在指定key上是否相等,如不等反回不相等字段
+  def getDoubleDataMap(iterable: Iterable[Map[String, String]]): (Map[String, String], Map[String, String]) = {
+    val map = iterable.map(m => (m("change_flag"), m)).toMap
+    (map.getOrElse("0", null), map.getOrElse("1", null))
+  }
+
 
 
   def main(args: Array[String]): Unit = {
@@ -204,14 +195,14 @@ object NgChangeExtract {
     )
     val spark = SparkUtils.InitEnv("NgChangeExtract", config)
 
-    var start = startArgs
+    var start = NgChangeExtractArgs.startArgs
     if (!tableName.equals("all")) {
       val set = tableName.split(",").toSet
       start = start.filter(a => set.contains(a.tableName))
     }
 
     val a = start.map(e => (e.tableName, () => {
-      ChangeExtractHandle(spark, e.project, e.tableName, e.primaryKey, inc_ds, e.primaryFields.split(",")).calc()
+      NgChangeExtract(spark, e.project, e.tableName, e.primaryKey, inc_ds, e.primaryFields.split(",")).calc()
       true
     }))
 

+ 21 - 0
src/main/scala/com/winhc/bigdata/spark/ng/change/NgChangeExtractArgs.scala

@@ -0,0 +1,21 @@
+package com.winhc.bigdata.spark.ng.change
+
+/**
+ * @author: XuJiakai
+ * @date: 2021/6/1 16:46
+ */
+case class NgChangeExtractArgs(project: String = "winhc_ng"
+                               , tableName: String
+                               , primaryKey: String = "rowkey"
+                               , primaryFields: String)
+
+
+object NgChangeExtractArgs {
+  val startArgs = Seq(
+    NgChangeExtractArgs(tableName = "company_holder", primaryFields = "holder_id,percent,amount,deleted")
+    , NgChangeExtractArgs(tableName = "company_staff", primaryFields = "staff_type,deleted")
+    , NgChangeExtractArgs(tableName = "company", primaryKey = "company_id", primaryFields = "name,cate_third_code,county_code,reg_capital_amount,legal_entity_name,legal_entity_id,deleted")
+    , NgChangeExtractArgs(tableName = "company_tm", primaryFields = "status")
+    , NgChangeExtractArgs(tableName = "company_icp", primaryFields = "domain")
+  )
+}

+ 14 - 15
src/main/scala/com/winhc/bigdata/spark/ng/change/NgCompanyChangeHandle.scala

@@ -38,6 +38,9 @@ trait NgCompanyChangeHandle extends Serializable with Logging {
         case "0" => {
           dynamic_type = NgCompanyUpdateType.Insert
         }
+        case "1" => {
+          dynamic_type = NgCompanyUpdateType.Deleted
+        }
         case _ => null
       }
     } else {
@@ -45,17 +48,19 @@ trait NgCompanyChangeHandle extends Serializable with Logging {
       val old_deleted = oldMap.getOrElse("deleted", "0")
       update_fields = getNotEquFields(oldMap, newMap)
       if (update_fields == null) {
-        //没有发生字段变化则接过滤
+        //没有发生字段变化则接过滤
         return null
       }
       s"$old_deleted$new_deleted" match {
-        case "00" => {
-          dynamic_type = NgCompanyUpdateType.Update
-        }
-        case "01" => {
-          dynamic_type = NgCompanyUpdateType.Deleted
-        }
-        case "10" | "90" | "09" | "19" => null
+        case "90" => dynamic_type = NgCompanyUpdateType.Insert
+
+        case "00" => dynamic_type = NgCompanyUpdateType.Update
+
+        case "01"|"91" => dynamic_type = NgCompanyUpdateType.Deleted
+
+        case "09" | "19" => dynamic_type = NgCompanyUpdateType.Remove
+
+        case "10" | "11" | "99" => null
 
         case _ => null
       }
@@ -104,11 +109,5 @@ trait NgCompanyChangeHandle extends Serializable with Logging {
     }
   }
 
-  protected def getValueOrNull(value: String, callBack: String): String = {
-    if (StringUtils.isNotBlank(value)) {
-      callBack
-    } else {
-      null
-    }
-  }
+  protected def getValueOrNull(value: String, callBack: String): String = if (StringUtils.isNotBlank(value)) callBack else null
 }

+ 1 - 0
src/main/scala/com/winhc/bigdata/spark/ng/change/NgCompanyUpdateType.scala

@@ -10,6 +10,7 @@ object NgCompanyUpdateType extends Enumeration {
   val Deleted = Value("deleted")
   val Insert = Value("insert")
   val Other = Value("other")
+  val Remove = Value("remove") // 物理删除,用于deleted 0->9等情况
 
   def checkExists(update_type: String) = this.values.exists(_.toString == update_type) //检测是否存在此枚举值
 

+ 54 - 0
src/main/scala/com/winhc/bigdata/spark/test/MyTest.scala

@@ -0,0 +1,54 @@
+package com.winhc.bigdata.spark.test
+
+import com.winhc.bigdata.spark.udf.{BaseFunc, CompanyMapping, CourtRank}
+import com.winhc.bigdata.spark.utils.{BaseUtil, LoggingUtils, SparkUtils, TableInitUtil}
+import org.apache.spark.sql.SparkSession
+
+import scala.annotation.meta.getter
+import scala.collection.mutable
+
+/**
+ * @author: XuJiakai
+ * @date: 2020/12/17 14:51
+ */
+case class MyTest(s: SparkSession
+                 ) extends LoggingUtils with TableInitUtil with BaseFunc with CompanyMapping with CourtRank {
+  @(transient@getter) val spark: SparkSession = s
+
+  def te(index: String): Unit = {
+    import com.winhc.bigdata.spark.implicits.PhoenixHelper._
+
+    sql(
+      s"""
+         |SELECT  rowkey
+         |        ,new_cid as cid
+         |        ,hid
+         |        ,staff_type
+         |        ,create_time
+         |        ,update_time
+         |        ,deleted
+         |FROM    winhc_eci_dev.xjk_xjk_staff
+         |""".stripMargin)
+      .save2PhoenixByJDBCPro(s"XJK_TMP_OUT_PHOENIX_STAFF_DI")
+  }
+}
+
+object MyTest {
+  def main(args: Array[String]): Unit = {
+    val Array(index) = args
+    val config = mutable.Map(
+      "spark.hadoop.odps.project.name" -> "winhc_eci_dev",
+      "spark.hadoop.odps.spark.local.partition.amt" -> "10"
+    )
+    val spark = SparkUtils.InitEnv(this.getClass.getSimpleName, config)
+
+    val start = BaseUtil.nowDate("yyyy-MM-dd HH:mm:ss")
+    MyTest(spark).te(index)
+    print(
+      s"""
+         |start as : ${start}
+         |end as : ${BaseUtil.nowDate("yyyy-MM-dd HH:mm:ss")}
+         |""".stripMargin)
+    spark.stop()
+  }
+}

+ 19 - 0
src/main/scala/com/winhc/bigdata/spark/test/TestCatalog.scala

@@ -1,10 +1,29 @@
 package com.winhc.bigdata.spark.test
 
+import com.winhc.bigdata.spark.utils.SparkUtils
+
+import scala.collection.mutable
+
 /**
  * @Author: XuJiakai
  * @Date: 2020/7/4 10:01
  * @Description:
  */
 object TestCatalog {
+  def main(args: Array[String]): Unit = {
+    val config = mutable.Map(
+      "spark.hadoop.odps.spark.local.partition.amt" -> "1000"
+    )
+    val spark  = SparkUtils.InitEnv("test",config)
+
+    import spark.implicits._
+
+    spark.catalog.listTables().map(t=>t.name).filter(t=>t.startsWith("tmp_replace_deleted_"))
+      .collect().seq.foreach(t=>{
+      println(s"DROP TABLE IF EXISTS $t;")
+    })
+
+    spark.stop()
+  }
 
 }

+ 1 - 1
src/main/scala/com/winhc/bigdata/spark/test/TestCompanyDynamic.scala

@@ -10,7 +10,7 @@ import org.apache.spark.sql.SparkSession
 import scala.collection.mutable
 
 /**
- * @Description: ${todo}
+ * @Description:
  * @author π
  * @date 2020/8/1918:08
  */

+ 4 - 4
src/main/scala/com/winhc/bigdata/spark/test/TestFlow.scala

@@ -1,9 +1,5 @@
 package com.winhc.bigdata.spark.test
 
-import com.winhc.bigdata.spark.utils.SparkUtils
-
-import scala.collection.mutable
-
 /**
  * @Author: XuJiakai
  * @Date: 2020/6/22 14:32
@@ -11,6 +7,10 @@ import scala.collection.mutable
  */
 object TestFlow {
   def main(args: Array[String]): Unit = {
+    val str = "1.1123E7"
+    val fmt = new java.text.DecimalFormat("0.00")
+
+    println(fmt.format(str.toDouble))
 
 
 

+ 2 - 0
src/main/scala/com/winhc/bigdata/spark/test/TestSpark2Hbase.scala

@@ -27,6 +27,7 @@ object TestSpark2Hbase extends Logging {
     val spark = SparkUtils.InitEnv("TestSpark2Hbase", map)
     import spark._
 
+//    val outDir = getOutputPath(job)
     val jobConf = HBaseConfig.HBaseOutputJobConf(hbaseKVTable)
     val df1 = spark.createDataFrame(Seq(("1", "2", "3"))).toDF("col0", "col1", "col2")
     df1.rdd.map(row => {
@@ -39,6 +40,7 @@ object TestSpark2Hbase extends Logging {
         put.addColumn(f_bytes, current_cid_bytes, Bytes.toBytes(current_cid))
       (new ImmutableBytesWritable, put)
     }).saveAsHadoopDataset(jobConf)
+//      .saveAsNewAPIHadoopDataset(jobConf)
 
     logInfo("save hbase success!")
     spark.stop()

+ 14 - 0
src/main/scala/com/winhc/bigdata/spark/test/YamlTest.scala

@@ -1,11 +1,25 @@
 package com.winhc.bigdata.spark.test
 
+import org.slf4j.LoggerFactory
+
 /**
  * @Author: XuJiakai
  * @Date: 2020/6/29 13:51
  * @Description:
  */
 object YamlTest {
+  private val logger = LoggerFactory.getLogger(getClass);
   def main(args: Array[String]): Unit = {
+    logger.debug("abc")
+
   }
+
+/*  case class Pab(a: Seq[String])
+
+  def main(args: Array[String]): Unit = {
+    val foo = ChangeExtractUtils.getHandleClazz("company",Seq("a"))
+    val a = Map("a" -> "b")
+    val b = Map("a" -> "c")
+    println(foo.handle("a", a, b)) // prints "Hello there, Walter"
+  }*/
 }

+ 42 - 0
src/main/scala/com/winhc/bigdata/spark/test/addMd5Sql.scala

@@ -1,5 +1,12 @@
 package com.winhc.bigdata.spark.test
 
+import java.io.{File, PrintWriter}
+
+import com.winhc.bigdata.spark.test.ExportSql.{getCols, getSql}
+import com.winhc.bigdata.spark.utils.SparkUtils
+
+import scala.collection.mutable
+
 /**
  * @Author: XuJiakai
  * @Date: 2020/7/3 08:53
@@ -7,4 +14,39 @@ package com.winhc.bigdata.spark.test
  */
 object addMd5Sql {
 
+  def getAddSql(tableName: String, cols: Seq[String]): String = {
+    val cs = cols.filter(!"ds".equals(_)).seq
+    s"""
+      |ALTER TABLE $tableName ADD COLUMNS(md5 String COMMENT '数据去重主键md5值');
+      |INSERT OVERWRITE TABLE $tableName PARTITION(ds)
+      |SELECT ${cs.mkString(",")},md5(),ds
+      |FROM
+      |    $tableName
+      |WHERE ds>0;
+      |""".stripMargin
+    null
+  }
+
+  def main(args: Array[String]): Unit = {
+    val config = mutable.Map(
+      "spark.hadoop.odps.project.name" -> "winhc_eci_dev",
+      "spark.hadoop.odps.spark.local.partition.amt" -> "10"
+    )
+    val spark = SparkUtils.InitEnv("exportSql", config)
+
+    val catalog = spark.catalog
+    val tables = catalog.listTables.collect().map(_.name).filter(_.startsWith("inc_ods")).seq
+
+
+    val allSql = tables.map(t => {
+      getSql(t, getCols(catalog, t))
+    }).seq.mkString("")
+
+    val writer = new PrintWriter(new File("C:\\Users\\x\\Desktop\\公司\\all_sql.txt"))
+
+    writer.write(allSql)
+    writer.close()
+
+    spark.stop()
+  }
 }