Browse Source

Merge remote-tracking branch 'origin/master'

xufei 4 years ago
parent
commit
cf0dc616b5

+ 24 - 0
src/main/scala/com/winhc/bigdata/spark/const/CaseChanceConst.scala

@@ -0,0 +1,24 @@
+package com.winhc.bigdata.spark.const
+
+/**
+ * @Author: XuJiakai
+ * @Date: 2020/7/16 10:48
+ * @Description:
+ */
+object CaseChanceConst {
+  val CHANCE_DYNAMIC_TYPE = Map(
+    /*"" -> "3-1" //企业增资
+    , "" -> "3-2" //企业新增对外投资
+    , "" -> "3-3" //新增招投标
+    , "" -> "3-4" //新增招聘
+    , "" -> "3-5" //地块公示
+    , "" -> "3-6" //购地信息
+    , "" -> "3-7" //土地转让
+
+    ,*/ "company_tm" -> "3-8" //知识产权-商标
+    , "company_patent_list" -> "3-9" //专利
+    , "company_certificate" -> "3-10" //资质证书
+    , "company_copyright_works_list" -> "3-11" //作品著作权
+    , "company_copyright_reg_list" -> "3-12" //软件著作权
+  )
+}

+ 8 - 1
src/main/scala/com/winhc/bigdata/spark/implicits/CompanyIndexSave2EsHelper.scala

@@ -85,7 +85,14 @@ object CompanyIndexSave2EsHelper {
     val city_code = c._2
     val city_code = c._2
     val county_code = c._3
     val county_code = c._3
     val et = map("estiblish_time")
     val et = map("estiblish_time")
-    val time = if (StringUtils.isNotBlank(et)) et else null
+    var time = if (StringUtils.isNotBlank(et)){
+      if (et.contains(" ")) {
+        et.split(" ")(0)
+      }else{
+        et
+      }
+    }  else null
+
 
 
     val doc = CompanyDoc(
     val doc = CompanyDoc(
       cname = getCompanyName(map("name"))
       cname = getCompanyName(map("name"))

+ 4 - 13
src/main/scala/com/winhc/bigdata/spark/jobs/CompanyIncCompany2Es.scala

@@ -136,20 +136,11 @@ object CompanyIncCompany2Es {
         }
         }
         (new ImmutableBytesWritable, put)
         (new ImmutableBytesWritable, put)
       }).filter(_ != null)
       }).filter(_ != null)
-//        .saveAsNewAPIHadoopDataset(jobConf)
-              .saveAsHadoopDataset(jobConf)
+        .saveAsHadoopDataset(jobConf)
 
 
       //写出到es
       //写出到es
-      import com.winhc.bigdata.spark.utils.CompanyEsUtils.getEsDoc
-      import org.elasticsearch.spark._
-      stringDf.map(r => {
-        val cid = r.getAs[String]("cid")
-        val cname = r.getAs[String]("name")
-        val history_names = r.getAs[String]("history_names")
-        val current_cid = r.getAs[String]("current_cid")
-        val company_type = r.getAs[String]("company_type")
-        getEsDoc(cid, cname, history_names, current_cid, company_type)
-      }).rdd.saveToEsWithMeta("winhc-company/company")
+      import com.winhc.bigdata.spark.implicits.CompanyIndexSave2EsHelper._
+      stringDf.companyIndexSave2Es()
 
 
     }
     }
   }
   }
@@ -164,7 +155,7 @@ object CompanyIncCompany2Es {
 
 
     val config = EsConfig.getEsConfigMap ++ mutable.Map(
     val config = EsConfig.getEsConfigMap ++ mutable.Map(
       "spark.hadoop.odps.project.name" -> project,
       "spark.hadoop.odps.project.name" -> project,
-      "spark.hadoop.odps.spark.local.partition.amt" -> "2"
+      "spark.hadoop.odps.spark.local.partition.amt" -> "10"
     )
     )
 
 
     val spark = SparkUtils.InitEnv("company2Es", config)
     val spark = SparkUtils.InitEnv("company2Es", config)

+ 53 - 10
src/main/scala/com/winhc/bigdata/spark/jobs/chance/ChangeExtract.scala

@@ -2,8 +2,8 @@ package com.winhc.bigdata.spark.jobs.chance
 
 
 import com.winhc.bigdata.spark.config.EsConfig
 import com.winhc.bigdata.spark.config.EsConfig
 import com.winhc.bigdata.spark.utils.BaseUtil.isWindows
 import com.winhc.bigdata.spark.utils.BaseUtil.isWindows
-import com.winhc.bigdata.spark.utils.ChangeExtractUtils.getDoubleDataMap
-import com.winhc.bigdata.spark.utils.{BaseUtil, ChangeExtractUtils, LoggingUtils, SparkUtils}
+import com.winhc.bigdata.spark.utils.{BaseUtil, LoggingUtils, SparkUtils}
+import org.apache.spark.internal.Logging
 import org.apache.spark.sql.functions.col
 import org.apache.spark.sql.functions.col
 import org.apache.spark.sql.types.{MapType, StringType, StructField, StructType}
 import org.apache.spark.sql.types.{MapType, StringType, StructField, StructType}
 import org.apache.spark.sql.{Row, SparkSession}
 import org.apache.spark.sql.{Row, SparkSession}
@@ -18,15 +18,35 @@ import scala.collection.mutable
  */
  */
 object ChangeExtract {
 object ChangeExtract {
 
 
+  //判断两个map在指定key上是否相等,如不等反回不相等字段
+  def getDoubleDataMap(iterable: Iterable[Map[String, String]]): (Map[String, String], Map[String, String]) = {
+    val map = iterable.map(m => (m("change_flag"), m)).toMap
+    (map("0"), map("1"))
+  }
+
+  def getHandleClazz(tableName: String, equCols: Seq[String]): {def handle(rowkey: String, oldMap: Map[String, String], newMap: Map[String, String]): (String, String, String, Map[String, String], String, String, String, String)} = {
+    val clazz = s"com.winhc.bigdata.spark.jobs.chance.$tableName"
+    val foo = Class.forName(clazz)
+      .getConstructors.head.newInstance(equCols)
+      .asInstanceOf[ {
+      def handle(rowkey: String, oldMap: Map[String, String], newMap: Map[String, String]): (String, String, String, Map[String, String], String, String, String, String)
+    }]
+    foo
+  }
+
+
   case class ChangeExtractHandle(s: SparkSession,
   case class ChangeExtractHandle(s: SparkSession,
                                  project: String, //表所在工程名
                                  project: String, //表所在工程名
                                  tableName: String, //表名(不加前后辍)
                                  tableName: String, //表名(不加前后辍)
                                  primaryKey: String, //此维度主键
                                  primaryKey: String, //此维度主键
                                  inc_ds: String, //需要计算的分区
                                  inc_ds: String, //需要计算的分区
                                  primaryFields: Seq[String] //主要字段,该字段任意一个不同 则认为发生变化
                                  primaryFields: Seq[String] //主要字段,该字段任意一个不同 则认为发生变化
-                                ) extends LoggingUtils {
+                                ) extends LoggingUtils with Logging {
     @(transient@getter) val spark: SparkSession = s
     @(transient@getter) val spark: SparkSession = s
 
 
+
+    val target_eci_change_extract = "ads_change_extract"
+
     def calc(): Unit = {
     def calc(): Unit = {
       val cols = primaryFields.filter(!_.equals(primaryKey)).seq
       val cols = primaryFields.filter(!_.equals(primaryKey)).seq
 
 
@@ -39,7 +59,7 @@ object ChangeExtract {
 
 
       val lastDs_ads_all = getLastPartitionsOrElse(s"$project.ads_$tableName", "0")
       val lastDs_ads_all = getLastPartitionsOrElse(s"$project.ads_$tableName", "0")
 
 
-      val handle = ChangeExtractUtils.getHandleClazz(tableName, cols)
+      val handle = getHandleClazz(tableName, cols)
 
 
       val update_time = BaseUtil.nowDate()
       val update_time = BaseUtil.nowDate()
       val rdd = sql(
       val rdd = sql(
@@ -80,9 +100,10 @@ object ChangeExtract {
         .map(x => {
         .map(x => {
           val rowkey = x._1
           val rowkey = x._1
           val map_list = x._2
           val map_list = x._2
+          //          try {
           if (map_list.size == 1) {
           if (map_list.size == 1) {
             val res = handle.handle(rowkey, null, map_list.head)
             val res = handle.handle(rowkey, null, map_list.head)
-            Row(res._1, tableName, res._2, res._3, res._4, res._5, res._6, res._7, update_time)
+            Row(res._1, res._2, tableName, res._3, res._4, res._5, res._6, res._7, res._8, update_time)
           } else {
           } else {
             if (map_list.size > 2) {
             if (map_list.size > 2) {
               logger.error("list.size greater than 2! rowkey:" + rowkey)
               logger.error("list.size greater than 2! rowkey:" + rowkey)
@@ -92,13 +113,26 @@ object ChangeExtract {
             val new_map = m._1
             val new_map = m._1
             val old_map = m._2
             val old_map = m._2
             val res = handle.handle(rowkey, old_map, new_map)
             val res = handle.handle(rowkey, old_map, new_map)
-            Row(res._1, tableName, res._2, res._3, res._4, res._5, res._6, res._7, update_time)
+            if (res == null) {
+              null
+            } else {
+              Row(res._1, res._2, tableName, res._3, res._4, res._5, res._6, res._7, res._8, update_time)
+            }
           }
           }
+          /* } catch {
+             case e: Exception => {
+               logError(s"xjk rowkey:$rowkey msg:${e.getMessage} equCols:$cols")
+               logError(e.getMessage, e)
+               println(s"xjk rowkey:$rowkey msg:${e.getMessage} equCols:$cols")
+             }
+               null
+           }*/
         }).filter(_ != null)
         }).filter(_ != null)
 
 
       // (123_abc,insert,{a->b},all,新增某土地公示,1(1.一般变更,2.风险变更))
       // (123_abc,insert,{a->b},all,新增某土地公示,1(1.一般变更,2.风险变更))
       val schema = StructType(Array(
       val schema = StructType(Array(
         StructField("rowkey", StringType), //表数据主建
         StructField("rowkey", StringType), //表数据主建
+        StructField("cid", StringType), //公司id
         StructField("table_name", StringType), //表名
         StructField("table_name", StringType), //表名
         StructField("type", StringType), // 变更类型 insert update
         StructField("type", StringType), // 变更类型 insert update
         StructField("data", MapType(StringType, StringType)), //变更后数据
         StructField("data", MapType(StringType, StringType)), //变更后数据
@@ -109,15 +143,24 @@ object ChangeExtract {
         StructField("update_time", StringType) //处理时间
         StructField("update_time", StringType) //处理时间
       ))
       ))
 
 
-      val df = spark.createDataFrame(rdd, schema) //
+      spark.createDataFrame(rdd, schema)
+        .createOrReplaceTempView("tmp_change_extract_view") //
 
 
-      df.write
-        .mode(if (isWindows) "append" else "overwrite")
-        .insertInto(s"${project}.tmp_xjk_icp_change_v2")
+      sql(
+        s"""
+           |INSERT ${if (isWindows) "INTO" else "OVERWRITE"} TABLE ${project}.$target_eci_change_extract PARTITION(ds='$ds',tn='$tableName')
+           |SELECT *
+           |FROM
+           |    tmp_change_extract_view
+           |""".stripMargin)
     }
     }
   }
   }
 
 
 
 
+  // winhc_eci_dev company_tm rowkey 20200707 rowkey,status_new
+  // winhc_eci_dev company_patent_list rowkey 20200707 rowkey,status_new
+
+
   // winhc_eci_dev company cid 20200630 legal_entity_id,reg_location,business_scope,reg_status,reg_capital,emails,phones
   // winhc_eci_dev company cid 20200630 legal_entity_id,reg_location,business_scope,reg_status,reg_capital,emails,phones
   def main(args: Array[String]): Unit = {
   def main(args: Array[String]): Unit = {
     val Array(project, tableName, rowkey, inc_ds, pf) = args
     val Array(project, tableName, rowkey, inc_ds, pf) = args

+ 77 - 32
src/main/scala/com/winhc/bigdata/spark/jobs/chance/CompanyChangeHandle.scala

@@ -1,7 +1,9 @@
 package com.winhc.bigdata.spark.jobs.chance
 package com.winhc.bigdata.spark.jobs.chance
 
 
-import com.winhc.bigdata.spark.utils.BaseUtil
 import com.winhc.bigdata.spark.utils.BaseUtil.cleanup
 import com.winhc.bigdata.spark.utils.BaseUtil.cleanup
+import com.winhc.bigdata.spark.utils.ChangeExtractUtils
+import org.apache.commons.lang3.StringUtils
+import org.apache.spark.internal.Logging
 
 
 import scala.annotation.meta.{getter, setter}
 import scala.annotation.meta.{getter, setter}
 
 
@@ -11,7 +13,7 @@ import scala.annotation.meta.{getter, setter}
  * @Description:
  * @Description:
  */
  */
 
 
-trait CompanyChangeHandle extends Serializable {
+trait CompanyChangeHandle extends Serializable with Logging {
   @getter
   @getter
   @setter
   @setter
   protected val equCols: Seq[String]
   protected val equCols: Seq[String]
@@ -21,9 +23,33 @@ trait CompanyChangeHandle extends Serializable {
    * @param rowkey
    * @param rowkey
    * @param oldMap
    * @param oldMap
    * @param newMap
    * @param newMap
-   * @return  rowkey,类型【insert or update】,新数据,更新字段,更新标题,变更标签【1.一般变更,2.风险变更 ...】,业务时间
+   * @return rowkey,cid,类型【insert or update】,新数据,更新字段,更新标题,变更标签【1.一般变更,2.风险变更 ...】,业务时间
    */
    */
-  def handle(rowkey: String, oldMap: Map[String, String], newMap: Map[String, String]): (String, String, Map[String, String], String, String, String, String)
+  def handle(rowkey: String, oldMap: Map[String, String], newMap: Map[String, String]): (String, String, String, Map[String, String], String, String, String, String) = {
+    if (oldMap == null) {
+      (rowkey, getCid(rowkey, newMap), "insert", newMap, null, getInsertTitle(newMap), getLabel(oldMap, newMap), getBizTime(newMap))
+    } else {
+      val t = getEquAndFields(oldMap, newMap)
+      if (t._1) {
+        null
+      } else {
+        (rowkey, getCid(rowkey, newMap), "update", newMap,
+          t._2
+          , getUpdateTitle(newMap), getLabel(oldMap, newMap), getBizTime(newMap))
+      }
+    }
+  }
+
+
+  def getCid(rowkey: String, newMap: Map[String, String]): String = rowkey.split("_")(0)
+
+  def getUpdateTitle(newMap: Map[String, String]): String
+
+  def getInsertTitle(newMap: Map[String, String]): String
+
+  def getLabel(oldMap: Map[String, String], newMap: Map[String, String]): String
+
+  def getBizTime(newMap: Map[String, String]): String
 
 
   def getEquAndFields(oldMap: Map[String, String], newMap: Map[String, String]): (Boolean, String) = {
   def getEquAndFields(oldMap: Map[String, String], newMap: Map[String, String]): (Boolean, String) = {
     val tmp = equCols.map(f => {
     val tmp = equCols.map(f => {
@@ -36,40 +62,59 @@ trait CompanyChangeHandle extends Serializable {
       (eq, tmp.filter(!_._2).map(_._1).mkString(","))
       (eq, tmp.filter(!_._2).map(_._1).mkString(","))
     }
     }
   }
   }
-}
 
 
-//土地公示
-case class company_land_publicity(equCols: Seq[String]) extends CompanyChangeHandle with Serializable {
-  override def handle(rowkey: String, oldMap: Map[String, String], newMap: Map[String, String]): (String, String, Map[String, String], String, String, String, String) = {
-    if (oldMap == null) {
-      (rowkey, "insert", newMap, "", s"新增某地块公示", "1", "业务时间")
+
+  protected def getValueOrNull(value: String, callBack: String): String = {
+    if (StringUtils.isNotBlank(value)) {
+      callBack
     } else {
     } else {
-      val t = getEquAndFields(oldMap, newMap)
-      if (t._1) {
-        null
-      } else {
-        (rowkey, "update", newMap,
-          t._2
-          , s"更新某地块公示", "1", "业务时间")
-      }
+      null
     }
     }
   }
   }
 }
 }
 
 
+//土地公示
+case class company_land_publicity(equCols: Seq[String]) extends CompanyChangeHandle with Serializable {
+
+  override def getLabel(oldMap: Map[String, String], newMap: Map[String, String]): String = "1"
+
+  override def getBizTime(newMap: Map[String, String]): String = "业务时间"
+
+  override def getUpdateTitle(newMap: Map[String, String]): String = "更新某地块公示"
+
+  override def getInsertTitle(newMap: Map[String, String]): String = "新增某地块公示"
+}
+
 
 
 case class company(equCols: Seq[String]) extends CompanyChangeHandle with Serializable {
 case class company(equCols: Seq[String]) extends CompanyChangeHandle with Serializable {
-  override def handle(rowkey: String, oldMap: Map[String, String], newMap: Map[String, String]): (String, String, Map[String, String], String, String, String, String) = {
-    if (oldMap == null) {
-      (rowkey, "insert", newMap, "", s"新增一家公司", "1", "业务时间")
-    } else {
-      val t = getEquAndFields(oldMap, newMap)
-      if (t._1) {
-        null
-      } else {
-        (rowkey, "update", newMap,
-          t._2
-          , s"更新一家公司", "1", "业务时间")
-      }
-    }
-  }
+  override def getCid(rowkey: String, newMap: Map[String, String]): String = rowkey
+
+  override def getLabel(oldMap: Map[String, String], newMap: Map[String, String]): String = "1"
+
+  override def getBizTime(newMap: Map[String, String]): String = "业务时间"
+
+  override def getUpdateTitle(newMap: Map[String, String]): String = "更新一家公司"
+
+  override def getInsertTitle(newMap: Map[String, String]): String = "新增一家公司"
+}
+
+case class company_tm(equCols: Seq[String]) extends CompanyChangeHandle {
+  override def getUpdateTitle(newMap: Map[String, String]): String = getValueOrNull(newMap("tm_name"), s"${newMap("tm_name")}商标发生变更")
+
+  override def getInsertTitle(newMap: Map[String, String]): String = getValueOrNull(newMap("tm_name"), s"新增${newMap("tm_name")}商标")
+
+  override def getLabel(oldMap: Map[String, String], newMap: Map[String, String]): String = ChangeExtractUtils.get_ip_tags("商标", newMap("tm_name"), newMap("app_date"), newMap("reg_no"))
+
+  override def getBizTime(newMap: Map[String, String]): String = newMap("app_date")
+}
+
+//专利
+case class company_patent_list(equCols:Seq[String])extends CompanyChangeHandle{
+  override def getUpdateTitle(newMap: Map[String, String]): String = getValueOrNull(newMap("title"), s"${newMap("title")}专利发生变更")
+
+  override def getInsertTitle(newMap: Map[String, String]): String =  getValueOrNull(newMap("title"), s"新增${newMap("title")}专利")
+
+  override def getLabel(oldMap: Map[String, String], newMap: Map[String, String]): String = ChangeExtractUtils.get_ip_tags("专利", newMap("title"), newMap("app_date"), newMap("app_number"))
+
+  override def getBizTime(newMap: Map[String, String]): String = newMap("app_date")
 }
 }

+ 10 - 7
src/main/scala/com/winhc/bigdata/spark/jobs/chance/Inc_eci_debtor_relation.scala

@@ -18,7 +18,7 @@ import scala.collection.mutable
 /**
 /**
  * @Author: XuJiakai
  * @Author: XuJiakai
  * @Date: 2020/7/13 16:32
  * @Date: 2020/7/13 16:32
- * @Description:
+ * @Description: 企业债仅关系增量处理
  */
  */
 object Inc_eci_debtor_relation {
 object Inc_eci_debtor_relation {
 
 
@@ -181,9 +181,9 @@ object Inc_eci_debtor_relation {
 
 
   }
   }
 
 
-  val target_ads_creditor_info = "xjk_ads_creditor_info_test"
-  val target_ads_eci_debtor_relation = "xjk_ads_eci_debtor_relation_test"
-  val target_write_debtor_relation = "xjk_write_debtor_relation_test"
+  val target_ads_creditor_info = "ads_creditor_info"
+  val target_ads_eci_debtor_relation = "ads_eci_debtor_relation"
+  val target_write_debtor_relation = "ads_write_eci_debtor_relation"
 
 
   case class DebtorRelation(s: SparkSession, ds: String) extends LoggingUtils with BaseFunc with Logging {
   case class DebtorRelation(s: SparkSession, ds: String) extends LoggingUtils with BaseFunc with Logging {
     @(transient@getter) val spark: SparkSession = s
     @(transient@getter) val spark: SparkSession = s
@@ -232,7 +232,7 @@ object Inc_eci_debtor_relation {
            |        ,'' as bg_emails
            |        ,'' as bg_emails
            |        ,CASE (zhixing_result = 2 OR( zhixing_result IS NULL AND curr_result = '胜')) WHEN TRUE THEN 0 ELSE 1 END AS deleted
            |        ,CASE (zhixing_result = 2 OR( zhixing_result IS NULL AND curr_result = '胜')) WHEN TRUE THEN 0 ELSE 1 END AS deleted
            |        ,1 as flag
            |        ,1 as flag
-           |FROM    winhc_eci_dev.inc_ods_creditor_info
+           |FROM    winhc_eci.inc_ods_creditor_info
            |LATERAL VIEW explode(company_split(ys_bg)) a AS ys_bg_xjk
            |LATERAL VIEW explode(company_split(ys_bg)) a AS ys_bg_xjk
            |LATERAL VIEW explode(company_split(ys_yg)) b AS ys_yg_xjk
            |LATERAL VIEW explode(company_split(ys_yg)) b AS ys_yg_xjk
            |WHERE   ds = $ds
            |WHERE   ds = $ds
@@ -364,7 +364,7 @@ object Inc_eci_debtor_relation {
            |                    ,row_number() OVER (PARTITION BY a.id,a.ys_yg_cid,a.ys_bg_cid ORDER BY flag DESC) c
            |                    ,row_number() OVER (PARTITION BY a.id,a.ys_yg_cid,a.ys_bg_cid ORDER BY flag DESC) c
            |            FROM    (
            |            FROM    (
            |                        SELECT  ${cols.mkString(",")},0 as flag
            |                        SELECT  ${cols.mkString(",")},0 as flag
-           |                        FROM    winhc_eci_dev.ads_creditor_info
+           |                        FROM    winhc_eci_dev.$target_ads_creditor_info
            |                        UNION ALL
            |                        UNION ALL
            |                        SELECT  ${cols.mkString(",")},flag
            |                        SELECT  ${cols.mkString(",")},flag
            |                        FROM    inc_tmp_creditor_info
            |                        FROM    inc_tmp_creditor_info
@@ -461,7 +461,10 @@ object Inc_eci_debtor_relation {
 
 
       val write_df = spark.createDataFrame(write_rdd, write_schema)
       val write_df = spark.createDataFrame(write_rdd, write_schema)
 
 
-      write_df.write.mode(if (isWindows) "append" else "overwrite").insertInto(s"winhc_eci_dev.$target_write_debtor_relation")
+      write_df
+        .write
+        .mode(if (isWindows) "append" else "overwrite")
+        .insertInto(s"winhc_eci.$target_write_debtor_relation")
 
 
     }
     }
   }
   }

+ 145 - 0
src/main/scala/com/winhc/bigdata/spark/jobs/chance/eci_good_news.scala

@@ -0,0 +1,145 @@
+package com.winhc.bigdata.spark.jobs.chance
+
+import com.winhc.bigdata.spark.config.EsConfig
+import com.winhc.bigdata.spark.udf.{BaseFunc, CaseChanceFunc}
+import com.winhc.bigdata.spark.utils.{BaseUtil, LoggingUtils, SparkUtils}
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.SparkSession
+
+import scala.annotation.meta.getter
+import scala.collection.mutable
+
+/**
+ * @Author: XuJiakai
+ * @Date: 2020/7/15 17:25
+ * @Description: 利好消息
+ */
+object eci_good_news {
+
+  case class eci_good_news_handle(s: SparkSession, ds: String) extends LoggingUtils with BaseFunc with CaseChanceFunc with Logging {
+    @(transient@getter) val spark: SparkSession = s
+
+    val source_ads_change_extract = "ads_change_extract"
+
+    val target_ads_case_chance = "xjk_test_ads_case_chance"
+    val target_ads_case_chance_element = "xjk_test_ads_case_chance_element"
+
+    def company_ip(): Unit = {
+      cleanup()
+      json_utils()
+      json_add_kv()
+      map_2_json()
+      chance_dynamic_type()
+      val now_time = BaseUtil.atMonthsBefore(0)
+
+      val eci_debtor_rel_ds = getLastPartitionsOrElse("winhc_eci_dev.ads_eci_debtor_relation", "0")
+
+      val relation_cols = getColumns("winhc_eci_dev.ads_eci_debtor_relation").filter(!_.equals("ds"))
+
+      val good_news_cols = getColumns(s"winhc_eci_dev.$source_ads_change_extract").filter(!_.equals("ds"))
+
+      val df = sql(
+        s"""
+           |SELECT  *
+           |FROM    (
+           |            SELECT  ${relation_cols.map(n => s"$n as rel_$n").mkString(",")}
+           |            FROM    winhc_eci_dev.ads_eci_debtor_relation
+           |            WHERE   ds = '$eci_debtor_rel_ds'
+           |            and deleted = 0
+           |        ) AS t1
+           |JOIN (
+           |              SELECT  ${good_news_cols.map(n => s"$n as detail_$n").mkString(",")}
+           |              FROM    winhc_eci_dev.$source_ads_change_extract
+           |              WHERE   ds = '${ds}'
+           |              AND     TYPE = 'insert'
+           |              AND     months_between('$now_time',to_date(biz_date)) < 3
+           |          ) AS t2
+           |ON      t1.rel_bg_cid = t2.detail_cid
+           |""".stripMargin)
+      df
+        .cache()
+        .createOrReplaceTempView("good_news_debtor_relation_view")
+
+
+      sql(
+        s"""
+           |INSERT  OVERWRITE TABLE winhc_eci_dev.$target_ads_case_chance_element PARTITION(ds='$ds')
+           |SELECT  md5(cleanup(CONCAT_WS('',case_chance_id,case_chance_type,type,province,city,dynamic_time))) AS id
+           |        ,CASE_CHANCE_ID
+           |        ,TYPE
+           |        ,PROVINCE
+           |        ,city
+           |        ,county
+           |        ,dynamic_time
+           |        ,public_date
+           |        ,CASE_CHANCE_TYPE
+           |FROM    (
+           |            SELECT  *
+           |                    ,ROW_NUMBER() OVER(PARTITION BY CASE_CHANCE_ID,TYPE,PROVINCE,city ORDER BY CASE_CHANCE_ID) AS num
+           |            FROM    (
+           |                        SELECT  detail_rowkey AS CASE_CHANCE_ID
+           |                                ,0 AS TYPE
+           |                                ,3 AS CASE_CHANCE_TYPE
+           |                                ,rel_yg_province_code AS PROVINCE
+           |                                ,rel_yg_city_code AS city
+           |                                ,rel_yg_county_code AS county
+           |                                ,detail_biz_date AS dynamic_time
+           |                                ,detail_update_time AS public_date
+           |                        FROM    good_news_debtor_relation_view
+           |                        UNION ALL
+           |                        SELECT  detail_rowkey AS CASE_CHANCE_ID
+           |                                ,1 AS TYPE
+           |                                ,3 AS CASE_CHANCE_TYPE
+           |                                ,rel_bg_province_code AS PROVINCE
+           |                                ,rel_bg_city_code AS city
+           |                                ,rel_bg_county_code AS county
+           |                                ,detail_biz_date AS dynamic_time
+           |                                ,detail_update_time AS public_date
+           |                        FROM    good_news_debtor_relation_view
+           |                    )
+           |        ) AS t
+           |WHERE   t.num = 1
+           |""".stripMargin)
+
+      sql(
+        s"""
+           |
+           |INSERT OVERWRITE TABLE winhc_eci_dev.$target_ads_case_chance PARTITION(ds='$ds')
+           |SELECT  detail_rowkey AS case_chance_id
+           |        ,detail_title AS title
+           |        ,null AS plaintiff
+           |        ,null AS defendant
+           |        ,rel_bg_name AS company_name
+           |        ,detail_cid AS cid
+           |        ,json_add_str(detail_label,CONCAT_WS(',',get_json_kv('reg_capital',rel_bg_reg_capital),get_json_kv('province',rel_bg_province_code),get_json_kv('city',rel_bg_city_code),get_json_kv('county',rel_bg_county_code))) AS tags
+           |        ,detail_rowkey AS biz_id
+           |        ,3 AS type
+           |        ,get_chance_dynamic_type(detail_table_name) AS dynamic_type
+           |        ,map_2_json(detail_data) AS dynamic_content
+           |        ,detail_biz_date AS dynamic_time
+           |        ,detail_update_time AS public_date
+           |FROM    (
+           |            SELECT  *
+           |                    ,ROW_NUMBER() OVER(PARTITION BY detail_rowkey ORDER BY detail_rowkey) AS num
+           |            FROM    good_news_debtor_relation_view
+           |        ) AS t
+           |WHERE   t.num = 1
+           |""".stripMargin)
+
+    }
+  }
+
+
+  def main(args: Array[String]): Unit = {
+    //    val Array(ds) = args
+
+    val config = EsConfig.getEsConfigMap ++ mutable.Map(
+      "spark.hadoop.odps.project.name" -> "winhc_eci_dev",
+      "spark.hadoop.odps.spark.local.partition.amt" -> "10"
+    )
+    val spark = SparkUtils.InitEnv("eci_good_news", config)
+    eci_good_news_handle(spark, "20200707").company_ip()
+    spark.stop()
+  }
+
+}

+ 37 - 0
src/main/scala/com/winhc/bigdata/spark/udf/BaseFunc.scala

@@ -1,9 +1,12 @@
 package com.winhc.bigdata.spark.udf
 package com.winhc.bigdata.spark.udf
 
 
+import com.winhc.bigdata.spark.utils.BaseUtil
 import org.apache.commons.lang3.StringUtils
 import org.apache.commons.lang3.StringUtils
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.SparkSession
 
 
 import scala.annotation.meta.getter
 import scala.annotation.meta.getter
+import org.json4s._
+import org.json4s.jackson.JsonMethods._
 
 
 /**
 /**
  * @Author: XuJiakai
  * @Author: XuJiakai
@@ -14,6 +17,14 @@ trait BaseFunc {
   @(transient@getter) protected val spark: SparkSession
   @(transient@getter) protected val spark: SparkSession
   private val pattern = "[^\\u4e00-\\u9fa5a-zA-Z \\(\\)().]+".r
   private val pattern = "[^\\u4e00-\\u9fa5a-zA-Z \\(\\)().]+".r
 
 
+
+  def cleanup(): Unit ={
+    //清理特殊字符
+    spark.udf.register("cleanup", (col: String) => {
+      BaseUtil.cleanup(col)
+    })
+  }
+
   def tyc_split(): Unit = {
   def tyc_split(): Unit = {
     spark.udf.register("tyc_split", (name: String) => {
     spark.udf.register("tyc_split", (name: String) => {
       if (StringUtils.isEmpty(name)) {
       if (StringUtils.isEmpty(name)) {
@@ -24,6 +35,32 @@ trait BaseFunc {
     })
     })
   }
   }
 
 
+  def json_utils(): Unit = {
+    spark.udf.register("get_json_kv", (key: String, value: String) => {
+      if (StringUtils.isNotBlank(value)) {
+        "\"" + key + "\":\"" + value + "\""
+      } else {
+        "\"" + key + "\":" + value
+      }
+    })
+  }
+
+  def json_add_kv(): Unit = {
+    spark.udf.register("json_add_str", (json: String, addVal: String) => {
+      if (StringUtils.isNotBlank(json)) {
+        "{" + addVal + "," + json.substring(1)
+      } else {
+        "{" + addVal + "}"
+      }
+    })
+  }
+
+  def map_2_json(): Unit = {
+    spark.udf.register("map_2_json", (map: Map[String, String]) => {
+      compact(render(Extraction.decompose(map)(DefaultFormats)))
+    })
+  }
+
   def company_split(): Unit = {
   def company_split(): Unit = {
     spark.udf.register("company_split", (name: String) => {
     spark.udf.register("company_split", (name: String) => {
       if (StringUtils.isEmpty(name)) {
       if (StringUtils.isEmpty(name)) {

+ 23 - 0
src/main/scala/com/winhc/bigdata/spark/udf/CaseChanceFunc.scala

@@ -0,0 +1,23 @@
+package com.winhc.bigdata.spark.udf
+
+import com.winhc.bigdata.spark.const.CaseChanceConst
+import org.apache.spark.sql.SparkSession
+
+import scala.annotation.meta.getter
+
+/**
+ * @Author: XuJiakai
+ * @Date: 2020/7/16 10:55
+ * @Description:
+ */
+trait CaseChanceFunc {
+
+  @(transient@getter) protected val spark: SparkSession
+
+  def chance_dynamic_type(): Unit = {
+    spark.udf.register("get_chance_dynamic_type", (tableName: String) => {
+      CaseChanceConst.CHANCE_DYNAMIC_TYPE(tableName)
+    })
+  }
+
+}

+ 12 - 17
src/main/scala/com/winhc/bigdata/spark/utils/ChangeExtractUtils.scala

@@ -1,33 +1,28 @@
 package com.winhc.bigdata.spark.utils
 package com.winhc.bigdata.spark.utils
 
 
+import org.apache.commons.lang3.StringUtils
 /**
 /**
  * @Author: XuJiakai
  * @Author: XuJiakai
  * @Date: 2020/7/7 13:59
  * @Date: 2020/7/7 13:59
  * @Description:
  * @Description:
  */
  */
-
-
 object ChangeExtractUtils {
 object ChangeExtractUtils {
 
 
-
-
-  //判断两个map在指定key上是否相等,如不等反回不相等字段
-  def getDoubleDataMap(iterable: Iterable[Map[String, String]]): (Map[String, String], Map[String, String]) = {
-    val map = iterable.map(m => (m("change_flag"), m)).toMap
-    (map("0"), map("1"))
+  //知识产权标签
+  def get_ip_tags(type_val: String, name: String, date: String, no: String): String = {
+    s"""{"type":${getValueOrNull(type_val)},"name":${getValueOrNull(name)},"date":${getValueOrNull(date)},"no":${getValueOrNull(no)}}""".trim
   }
   }
 
 
-  def getHandleClazz(tableName: String, equCols: Seq[String]): {def handle(rowkey: String, oldMap: Map[String, String], newMap: Map[String, String]): (String, String, Map[String, String], String, String, String, String)} = {
-    val clazz = s"com.winhc.bigdata.spark.jobs.chance.$tableName"
-    val foo = Class.forName(clazz)
-      .getConstructors.head.newInstance(Seq("a"))
-      .asInstanceOf[ {
-      def handle(rowkey: String, oldMap: Map[String, String], newMap: Map[String, String]): (String, String, Map[String, String], String, String, String, String)
-    }]
-    foo
+  private def getValueOrNull(value:String):String={
+    if(StringUtils.isNotBlank(value)){
+      "\""+value+"\""
+    }else{
+      null
+    }
   }
   }
 
 
+
   def main(args: Array[String]): Unit = {
   def main(args: Array[String]): Unit = {
+    val name = get_ip_tags("a", null, "b", null)
   }
   }
-
 }
 }