|
@@ -0,0 +1,90 @@
|
|
|
+package com.winhc.bigdata.spark.jobs.dynamic
|
|
|
+
|
|
|
+import com.winhc.bigdata.spark.config.EsConfig
|
|
|
+import com.winhc.bigdata.spark.utils.BaseUtil.isWindows
|
|
|
+import com.winhc.bigdata.spark.utils.ReflectUtils.getClazz
|
|
|
+import com.winhc.bigdata.spark.utils.{LoggingUtils, SparkUtils}
|
|
|
+import org.apache.spark.internal.Logging
|
|
|
+import org.apache.spark.sql.types.StringType
|
|
|
+import org.apache.spark.sql.{Row, SparkSession}
|
|
|
+
|
|
|
+import scala.annotation.meta.getter
|
|
|
+import scala.collection.mutable
|
|
|
+
|
|
|
+/**
|
|
|
+ * @Author: XuJiakai
|
|
|
+ * @Date: 2020/7/27 16:52
|
|
|
+ * @Description: 企业动态
|
|
|
+ */
|
|
|
+object CompanyDynamic {
|
|
|
+
|
|
|
+ case class CompanyDynamicUtil(s: SparkSession,
|
|
|
+ project: String, //表所在工程名
|
|
|
+ tableName: String, //表名(不加前后辍)
|
|
|
+ ds: String //此维度主键
|
|
|
+ ) extends LoggingUtils with Logging {
|
|
|
+ @(transient@getter) val spark: SparkSession = s
|
|
|
+
|
|
|
+ val targetTab = "target_tab"
|
|
|
+
|
|
|
+
|
|
|
+ def calc(): Unit = {
|
|
|
+ val handle = getClazz[ {
|
|
|
+ def org_type(): Seq[String]
|
|
|
+ def handle(rowkey: String, cid: String, change_fields: Seq[String], old_map: Map[String, String], new_map: Map[String, String]): (String, String, String, String, String, String, String, String)
|
|
|
+ }](s"com.winhc.bigdata.spark.jobs.dynamic.$tableName")
|
|
|
+
|
|
|
+ val types = handle.org_type()
|
|
|
+ val rdd = sql(
|
|
|
+ s"""
|
|
|
+ |SELECT *
|
|
|
+ |FROM winhc_eci_dev.ads_change_extract
|
|
|
+ |WHERE ds = '$ds'
|
|
|
+ |AND tn = '$tableName'
|
|
|
+ |AND TYPE in (${types.map("'" + _ + "'").mkString(",")})
|
|
|
+ |""".stripMargin)
|
|
|
+ .rdd.map(r => {
|
|
|
+ val rowkey = r.getAs[String]("rowkey")
|
|
|
+ val cid = r.getAs[String]("cid")
|
|
|
+ val new_data = r.getAs[Map[String, String]]("data")
|
|
|
+ val old_data = r.getAs[Map[String, String]]("old_data")
|
|
|
+ val biz_data = r.getAs[Map[String, String]]("biz_data")
|
|
|
+ val fields = r.getAs[String]("fields")
|
|
|
+ val res = handle.handle(rowkey, cid, fields.split(","), old_data, new_data)
|
|
|
+ Row(cid, res._1, res._2, res._3, res._4, res._5, res._6, res._7, res._8)
|
|
|
+ })
|
|
|
+
|
|
|
+ val schema = getSchema(Map(
|
|
|
+ "cid" -> StringType
|
|
|
+ , "info_type" -> StringType
|
|
|
+ , "rta_desc" -> StringType
|
|
|
+ , "change_content" -> StringType
|
|
|
+ , "change_time" -> StringType
|
|
|
+ , "biz_id" -> StringType
|
|
|
+ , "sub_info_type" -> StringType
|
|
|
+ , "info_risk_level" -> StringType
|
|
|
+ , "winhc_suggest" -> StringType
|
|
|
+ , "create_time" -> StringType
|
|
|
+ ))
|
|
|
+ spark.createDataFrame(rdd, schema).write
|
|
|
+ .mode(if (isWindows) "append" else "overwrite")
|
|
|
+ .insertInto(targetTab)
|
|
|
+
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ def main(args: Array[String]): Unit = {
|
|
|
+ val Array(project, tableName) = args
|
|
|
+ val config = EsConfig.getEsConfigMap ++ mutable.Map(
|
|
|
+ "spark.hadoop.odps.project.name" -> project,
|
|
|
+ "spark.hadoop.odps.spark.local.partition.amt" -> "10"
|
|
|
+ )
|
|
|
+ val spark = SparkUtils.InitEnv("CompanyDynamic", config)
|
|
|
+ CompanyDynamicUtil(spark, "winhc_eci_dev", "table_name", "ds").calc()
|
|
|
+ spark.stop()
|
|
|
+
|
|
|
+ }
|
|
|
+}
|