|
@@ -0,0 +1,165 @@
|
|
|
+package com.winhc.bigdata.spark.ng.dynamic
|
|
|
+
|
|
|
+import com.winhc.bigdata.spark.config.EsConfig
|
|
|
+import com.winhc.bigdata.spark.implicits.CaseClass2JsonHelper._
|
|
|
+import com.winhc.bigdata.spark.ng.dynamic.utils.DynamicAssociationEntity
|
|
|
+import com.winhc.bigdata.spark.utils.BaseUtil.isWindows
|
|
|
+import com.winhc.bigdata.spark.utils.{BaseUtil, LoggingUtils, ReflectUtils, SparkUtils}
|
|
|
+import org.apache.commons.lang3.StringUtils
|
|
|
+import org.apache.spark.internal.Logging
|
|
|
+import org.apache.spark.sql.{Row, SparkSession}
|
|
|
+
|
|
|
+import scala.annotation.meta.getter
|
|
|
+import scala.collection.mutable
|
|
|
+
|
|
|
+/**
|
|
|
+ * @author: XuJiakai
|
|
|
+ * @date: 2021/8/7 15:02
|
|
|
+ */
|
|
|
+
|
|
|
+case class DynamicUpdateId(s: SparkSession
|
|
|
+ ) extends LoggingUtils with Logging {
|
|
|
+ @(transient@getter) val spark: SparkSession = s
|
|
|
+
|
|
|
+ private val org_tab = "winhc_ng.out_company_dynamic"
|
|
|
+ private val target_tab = "winhc_ng.out_dynamic_update"
|
|
|
+
|
|
|
+
|
|
|
+ private val map: Map[String, DynamicAssociationEntity] = ReflectUtils.subObject[DynamicAssociationEntity](classOf[DynamicAssociationEntity], "com.winhc.bigdata.spark.ng.dynamic.handle", true).map(r => (r.getClass.getSimpleName, r)).toMap
|
|
|
+
|
|
|
+ init()
|
|
|
+
|
|
|
+ def init(): Unit = {
|
|
|
+ sql(
|
|
|
+ s"""
|
|
|
+ |CREATE TABLE IF NOT EXISTS $target_tab
|
|
|
+ |(
|
|
|
+ | content STRING COMMENT 'json数据'
|
|
|
+ |)
|
|
|
+ |COMMENT '企业动态添加Id表'
|
|
|
+ |PARTITIONED BY (ds STRING COMMENT '分区')
|
|
|
+ |LIFECYCLE 30
|
|
|
+ |""".stripMargin)
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ def calc(ds: String): Unit = {
|
|
|
+ val tns = Seq("company_court_announcement"
|
|
|
+ , "company_court_open_announcement"
|
|
|
+ , "company_court_register"
|
|
|
+ , "company_dishonest_info"
|
|
|
+ , "company_equity_info"
|
|
|
+ , "company_equity_pledge_holder"
|
|
|
+ , "company_judicial_assistance"
|
|
|
+ , "company_land_transfer"
|
|
|
+ , "company_lawsuit"
|
|
|
+ , "company_send_announcement"
|
|
|
+ , "company_zxr"
|
|
|
+ , "company_zxr_final_case"
|
|
|
+ , "company_zxr_restrict"
|
|
|
+ , "restrictions_on_exit"
|
|
|
+ , "zxr_evaluate_results")
|
|
|
+
|
|
|
+ sql(
|
|
|
+ s"""
|
|
|
+ |select to_json(MAP('rowkey',rowkey,"tn",tn,'flag','9')) content
|
|
|
+ |from winhc_ng.bds_change_extract
|
|
|
+ |where ds = '$ds'
|
|
|
+ |and update_type = 'remove'
|
|
|
+ |and tn <> 'company'
|
|
|
+ |and tn <> 'company_staff'
|
|
|
+ |and tn <> 'company_holder'
|
|
|
+ |UNION ALL
|
|
|
+ |select to_json(MAP('rowkey',rowkey,"tn",tn,'flag','8','company_id',new_data['company_id'],'relation_id',new_data['hid'])) content
|
|
|
+ |from winhc_ng.bds_change_extract
|
|
|
+ |where ds = '$ds'
|
|
|
+ |and update_type = 'remove'
|
|
|
+ |and tn = 'company_staff'
|
|
|
+ |and new_data['hid'] is not null
|
|
|
+ |UNION ALL
|
|
|
+ |select to_json(MAP('rowkey',rowkey,"tn",tn,'flag','8','company_id',new_data['company_id'],'relation_id',new_data['holder_id'])) content
|
|
|
+ |from winhc_ng.bds_change_extract
|
|
|
+ |where ds = '$ds'
|
|
|
+ |and update_type = 'remove'
|
|
|
+ |and tn = 'company_holder'
|
|
|
+ |and new_data['holder_id'] is not null
|
|
|
+ |""".stripMargin)
|
|
|
+ .createTempView("company_dynamic_out_tab_del")
|
|
|
+
|
|
|
+
|
|
|
+ val rdd = sql(
|
|
|
+ s"""
|
|
|
+ |select rowkey,
|
|
|
+ | company_id,
|
|
|
+ | table_name,
|
|
|
+ | update_type,
|
|
|
+ | old_data,
|
|
|
+ | new_data,
|
|
|
+ | change_fields,
|
|
|
+ | biz_date,
|
|
|
+ | update_time,
|
|
|
+ | tn
|
|
|
+ |from winhc_ng.bds_change_extract
|
|
|
+ |where ds = '$ds'
|
|
|
+ |and update_type = 'update'
|
|
|
+ |and ${tns.map(r => s" tn = '$r' ").mkString("(", "or", ")")}
|
|
|
+ |""".stripMargin)
|
|
|
+ .rdd.map(r => {
|
|
|
+ val rowkey = r.getAs[String]("rowkey")
|
|
|
+ val company_id = r.getAs[String]("company_id")
|
|
|
+ val table_name = r.getAs[String]("table_name")
|
|
|
+ val update_type = r.getAs[String]("update_type")
|
|
|
+ val old_data = r.getAs[Map[String, String]]("old_data")
|
|
|
+ val new_data = r.getAs[Map[String, String]]("new_data")
|
|
|
+ val change_fields = r.getAs[String]("change_fields")
|
|
|
+ val biz_date = r.getAs[String]("biz_date")
|
|
|
+ val update_time = r.getAs[String]("update_time")
|
|
|
+ val tn = r.getAs[String]("tn")
|
|
|
+ val handle = map(tn)
|
|
|
+ val old_entity = handle.getAssociationEntityInfo(old_data)
|
|
|
+ val new_entity = handle.getAssociationEntityInfo(new_data)
|
|
|
+ val old_name = old_entity.filter(e => StringUtils.isEmpty(e.keyno)).map(_.name).toSet
|
|
|
+ val entity: Seq[AssociationEntityInfo] = new_entity.filter(e => old_name.contains(e.name)).filter(e => StringUtils.isNotEmpty(e.keyno))
|
|
|
+ if (entity.isEmpty) {
|
|
|
+ null
|
|
|
+ } else {
|
|
|
+ Row(Map(
|
|
|
+ "flag" -> "0"
|
|
|
+ , "tn" -> tn
|
|
|
+ , "rowkey" -> rowkey
|
|
|
+ , "entity" -> entity).toJson()
|
|
|
+ , null
|
|
|
+ )
|
|
|
+ }
|
|
|
+ }).filter(_ != null)
|
|
|
+
|
|
|
+ spark.createDataFrame(rdd, spark.table(target_tab).schema)
|
|
|
+ .createTempView("company_dynamic_out_tab")
|
|
|
+ sql(
|
|
|
+ s"""
|
|
|
+ |INSERT ${if (isWindows) "INTO" else "OVERWRITE"} TABLE $target_tab PARTITION(ds='${BaseUtil.getYesterday()}')
|
|
|
+ |SELECT ${getColumns(target_tab).diff(Seq("ds")).mkString(",")}
|
|
|
+ |FROM company_dynamic_out_tab
|
|
|
+ |UNION ALL
|
|
|
+ |select ${getColumns(target_tab).diff(Seq("ds")).mkString(",")}
|
|
|
+ |from company_dynamic_out_tab_del
|
|
|
+ |""".stripMargin)
|
|
|
+
|
|
|
+
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+object DynamicUpdateId {
|
|
|
+
|
|
|
+ def main(args: Array[String]): Unit = {
|
|
|
+ val Array(ds) = args
|
|
|
+ val config = EsConfig.getEsConfigMap ++ mutable.Map(
|
|
|
+ "spark.hadoop.odps.project.name" -> "winhc_ng",
|
|
|
+ "spark.debug.maxToStringFields" -> "200",
|
|
|
+ "spark.hadoop.odps.spark.local.partition.amt" -> "100"
|
|
|
+ )
|
|
|
+ val spark = SparkUtils.InitEnv(this.getClass.getSimpleName, config)
|
|
|
+ DynamicUpdateId(spark).calc(ds)
|
|
|
+ spark.stop()
|
|
|
+ }
|
|
|
+}
|