|
@@ -3,7 +3,7 @@ package com.winhc.bigdata.spark.jobs.chance
|
|
|
import com.winhc.bigdata.spark.config.EsConfig
|
|
|
import com.winhc.bigdata.spark.utils.BaseUtil.isWindows
|
|
|
import com.winhc.bigdata.spark.utils.ChangeExtractUtils.getDoubleDataMap
|
|
|
-import com.winhc.bigdata.spark.utils.{ChangeExtractUtils, LoggingUtils, SparkUtils}
|
|
|
+import com.winhc.bigdata.spark.utils.{BaseUtil, ChangeExtractUtils, LoggingUtils, SparkUtils}
|
|
|
import org.apache.spark.sql.functions.col
|
|
|
import org.apache.spark.sql.types.{MapType, StringType, StructField, StructType}
|
|
|
import org.apache.spark.sql.{Row, SparkSession}
|
|
@@ -23,8 +23,7 @@ object ChangeExtract {
|
|
|
tableName: String, //表名(不加前后辍)
|
|
|
primaryKey: String, //此维度主键
|
|
|
inc_ds: String, //需要计算的分区
|
|
|
- primaryFields: Seq[String], //主要字段,该字段任意一个不同 则认为发生变化
|
|
|
- label: (Map[String, String], Map[String, String]) => String // 去重列
|
|
|
+ primaryFields: Seq[String] //主要字段,该字段任意一个不同 则认为发生变化
|
|
|
) extends LoggingUtils {
|
|
|
@(transient@getter) val spark: SparkSession = s
|
|
|
|
|
@@ -33,21 +32,23 @@ object ChangeExtract {
|
|
|
|
|
|
val ds = inc_ds.replace("-", "")
|
|
|
|
|
|
- val all_cols = primaryKey +: cols :+ "change_flag"
|
|
|
+ val intersectCols = getColumns(s"$project.ads_$tableName").toSet & getColumns(s"$project.inc_ads_$tableName").toSet
|
|
|
|
|
|
- val lastDs_ads_all = getLastPartitionsOrElse(s"$project.ads_$tableName", "0")
|
|
|
+ val otherAllCols = intersectCols.filter(!primaryKey.equals(_)).toSeq
|
|
|
+ val all_cols = primaryKey +: otherAllCols :+ "change_flag"
|
|
|
|
|
|
- val intersectCols = getColumns(s"$project.ads_$tableName").toSet & getColumns(s"$project.inc_ads_$tableName").toSet
|
|
|
+ val lastDs_ads_all = getLastPartitionsOrElse(s"$project.ads_$tableName", "0")
|
|
|
|
|
|
val handle = ChangeExtractUtils.getHandleClazz(tableName, cols)
|
|
|
|
|
|
+ val update_time = BaseUtil.nowDate()
|
|
|
val rdd = sql(
|
|
|
s"""
|
|
|
- |SELECT $primaryKey,${cols.mkString(",")},'0' as change_flag
|
|
|
+ |SELECT $primaryKey,${otherAllCols.mkString(",")},'0' as change_flag
|
|
|
|FROM $project.inc_ads_$tableName
|
|
|
|WHERE ds = $ds
|
|
|
|UNION ALL
|
|
|
- |SELECT t2.$primaryKey,${cols.map("t2." + _).mkString(",")},'1' as change_flag
|
|
|
+ |SELECT t2.$primaryKey,${otherAllCols.map("t2." + _).mkString(",")},'1' as change_flag
|
|
|
|FROM (
|
|
|
| SELECT DISTINCT ${primaryKey}
|
|
|
| FROM $project.inc_ads_$tableName
|
|
@@ -81,7 +82,7 @@ object ChangeExtract {
|
|
|
val map_list = x._2
|
|
|
if (map_list.size == 1) {
|
|
|
val res = handle.handle(rowkey, null, map_list.head)
|
|
|
- Row(res._1, tableName, res._2, res._3, res._4, res._5, res._6)
|
|
|
+ Row(res._1, tableName, res._2, res._3, res._4, res._5, res._6, res._7, update_time)
|
|
|
} else {
|
|
|
if (map_list.size > 2) {
|
|
|
logger.error("list.size greater than 2! rowkey:" + rowkey)
|
|
@@ -91,7 +92,7 @@ object ChangeExtract {
|
|
|
val new_map = m._1
|
|
|
val old_map = m._2
|
|
|
val res = handle.handle(rowkey, old_map, new_map)
|
|
|
- Row(res._1, tableName, res._2, res._3, res._4, res._5, res._6)
|
|
|
+ Row(res._1, tableName, res._2, res._3, res._4, res._5, res._6, res._7, update_time)
|
|
|
}
|
|
|
}).filter(_ != null)
|
|
|
|
|
@@ -103,7 +104,9 @@ object ChangeExtract {
|
|
|
StructField("data", MapType(StringType, StringType)),
|
|
|
StructField("fields", StringType),
|
|
|
StructField("title", StringType),
|
|
|
- StructField("label", StringType)
|
|
|
+ StructField("label", StringType),
|
|
|
+ StructField("biz_time", StringType),
|
|
|
+ StructField("update_time", StringType)
|
|
|
))
|
|
|
|
|
|
val df = spark.createDataFrame(rdd, schema) //
|
|
@@ -119,10 +122,6 @@ object ChangeExtract {
|
|
|
def main(args: Array[String]): Unit = {
|
|
|
val Array(project, tableName, rowkey, inc_ds, pf) = args
|
|
|
|
|
|
- def label(m1: Map[String, String], m2: Map[String, String]): String = {
|
|
|
- ""
|
|
|
- }
|
|
|
-
|
|
|
val config = EsConfig.getEsConfigMap ++ mutable.Map(
|
|
|
"spark.hadoop.odps.project.name" -> project,
|
|
|
"spark.hadoop.odps.spark.local.partition.amt" -> "10"
|
|
@@ -130,7 +129,7 @@ object ChangeExtract {
|
|
|
|
|
|
val spark = SparkUtils.InitEnv("ChangeExtract", config)
|
|
|
|
|
|
- ChangeExtractHandle(spark, project, tableName, rowkey, inc_ds, pf.split(","), label).calc
|
|
|
+ ChangeExtractHandle(spark, project, tableName, rowkey, inc_ds, pf.split(",")).calc
|
|
|
spark.stop()
|
|
|
}
|
|
|
|