|
@@ -0,0 +1,132 @@
|
|
|
+package com.winhc.bigdata.spark.jobs
|
|
|
+
|
|
|
+import com.winhc.bigdata.spark.config.EsConfig
|
|
|
+import com.winhc.bigdata.spark.udf.BaseFunc
|
|
|
+import com.winhc.bigdata.spark.utils.BaseUtil.isWindows
|
|
|
+import com.winhc.bigdata.spark.utils.{BaseUtil, LoggingUtils, SparkUtils}
|
|
|
+import org.apache.spark.internal.Logging
|
|
|
+import org.apache.spark.sql.SparkSession
|
|
|
+
|
|
|
+import scala.annotation.meta.getter
|
|
|
+import scala.collection.mutable
|
|
|
+
|
|
|
+/**
|
|
|
+ * @Author: XuJiakai
|
|
|
+ * @Date: 2020/7/30 10:34
|
|
|
+ * @Description:
|
|
|
+ */
|
|
|
+object inc_company_mapping {
|
|
|
+
|
|
|
+ case class IncCompanyMappingUtil(s: SparkSession,
|
|
|
+ project: String
|
|
|
+ ) extends LoggingUtils with Logging with BaseFunc {
|
|
|
+ @(transient@getter) val spark: SparkSession = s
|
|
|
+
|
|
|
+
|
|
|
+ def init(): Unit = {
|
|
|
+ sql(
|
|
|
+ s"""
|
|
|
+ |CREATE TABLE IF NOT EXISTS winhc_eci_dev.base_company_mapping
|
|
|
+ |(
|
|
|
+ | cid STRING COMMENT 'cid'
|
|
|
+ | ,cname STRING COMMENT 'cname'
|
|
|
+ | ,new_cid STRING COMMENT 'new_cid'
|
|
|
+ | ,update_time TIMESTAMP COMMENT '更新时间'
|
|
|
+ |)
|
|
|
+ |COMMENT '公司全量数据cid到最新new_cid表,20200730'
|
|
|
+ |PARTITIONED BY
|
|
|
+ |(
|
|
|
+ | ds STRING COMMENT '分区'
|
|
|
+ |)
|
|
|
+ |LIFECYCLE 15
|
|
|
+ |""".stripMargin)
|
|
|
+
|
|
|
+ sql(
|
|
|
+ s"""
|
|
|
+ |INSERT ${if (isWindows) "INTO" else "OVERWRITE"} TABLE winhc_eci_dev.base_company_mapping PARTITION(ds='20200604')
|
|
|
+ |SELECT cid
|
|
|
+ | ,name AS cname
|
|
|
+ | ,coalesce(current_cid,cid) AS new_cid
|
|
|
+ | ,to_date('2020-06-04 00:00:00') as update_time
|
|
|
+ |FROM winhc_eci_dev.ads_company
|
|
|
+ |WHERE ds = '20200604'
|
|
|
+ |""".stripMargin)
|
|
|
+ }
|
|
|
+
|
|
|
+ def inc(ds: String): Unit = {
|
|
|
+ sql(
|
|
|
+ s"""
|
|
|
+ |INSERT ${if (isWindows) "INTO" else "OVERWRITE"} TABLE winhc_eci_dev.base_company_mapping PARTITION(ds='$ds')
|
|
|
+ |SELECT t1.cid AS cid
|
|
|
+ | ,t1.cname AS cname
|
|
|
+ | ,COALESCE(t2.new_cid,t1.new_cid) AS new_cid
|
|
|
+ | ,COALESCE(t2.update_time,t1.update_time) AS update_time
|
|
|
+ |FROM (
|
|
|
+ | SELECT all_mapping.cid
|
|
|
+ | ,all_mapping.cname
|
|
|
+ | ,all_mapping.new_cid
|
|
|
+ | ,all_mapping.update_time
|
|
|
+ | FROM (
|
|
|
+ | SELECT *
|
|
|
+ | ,ROW_NUMBER() OVER(PARTITION BY cid ORDER BY update_time DESC) AS c
|
|
|
+ | FROM (
|
|
|
+ | SELECT cid,cname,new_cid,update_time
|
|
|
+ | FROM winhc_eci_dev.base_company_mapping
|
|
|
+ | WHERE ds = '${BaseUtil.atDaysAfter(-1, ds)}'
|
|
|
+ | UNION ALL
|
|
|
+ | SELECT cid
|
|
|
+ | ,name AS cname
|
|
|
+ | ,COALESCE(current_cid,cid) AS new_cid
|
|
|
+ | ,now() AS update_time
|
|
|
+ | FROM winhc_eci_dev.inc_ods_company
|
|
|
+ | WHERE ds = '$ds'
|
|
|
+ | AND cid IS NOT NULL
|
|
|
+ | )
|
|
|
+ | ) AS all_mapping
|
|
|
+ | WHERE all_mapping.c = 1
|
|
|
+ | ) AS t1
|
|
|
+ |LEFT JOIN (
|
|
|
+ | SELECT cid
|
|
|
+ | ,current_cid AS new_cid
|
|
|
+ | ,now() AS update_time
|
|
|
+ | FROM winhc_eci_dev.inc_ods_company
|
|
|
+ | WHERE ds = '$ds'
|
|
|
+ | AND cid IS NOT NULL
|
|
|
+ | AND current_cid IS NOT NULL
|
|
|
+ | group by cid,current_cid
|
|
|
+ | ) AS t2
|
|
|
+ |ON t1.new_cid = t2.cid
|
|
|
+ |""".stripMargin)
|
|
|
+ }
|
|
|
+
|
|
|
+ def inc(): Unit = {
|
|
|
+
|
|
|
+ val lastDs = getLastPartitionsOrElse("winhc_eci_dev.base_company_mapping", "20200604")
|
|
|
+ val dss = getPartitions("winhc_eci_dev.inc_ods_company").filter(_ > lastDs)
|
|
|
+
|
|
|
+ println("计算分区:" + dss.mkString(","))
|
|
|
+
|
|
|
+ for (ds <- dss) {
|
|
|
+ inc(ds)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ def main(args: Array[String]): Unit = {
|
|
|
+
|
|
|
+ val project = "winhc_eci_dev"
|
|
|
+ val config = EsConfig.getEsConfigMap ++ mutable.Map(
|
|
|
+ "spark.hadoop.odps.project.name" -> project,
|
|
|
+ "spark.debug.maxToStringFields" -> "200",
|
|
|
+ "spark.hadoop.odps.spark.local.partition.amt" -> "100"
|
|
|
+ )
|
|
|
+ val spark = SparkUtils.InitEnv("inc_company_mapping", config)
|
|
|
+
|
|
|
+ val incCompanyMappingUtil = IncCompanyMappingUtil(spark, project)
|
|
|
+ // incCompanyMappingUtil.init()
|
|
|
+ incCompanyMappingUtil.inc()
|
|
|
+ spark.stop()
|
|
|
+ }
|
|
|
+
|
|
|
+}
|