Ver Fonte

专利增加rowkey规则

xufei há 4 anos atrás
pai
commit
ebb3ae4f38

+ 6 - 1
src/main/scala/com/winhc/bigdata/spark/udf/CompanyMapping.scala

@@ -5,10 +5,11 @@ import org.apache.commons.lang3.StringUtils
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.sql.SparkSession
 import com.winhc.bigdata.spark.utils.BaseUtil._
+import com.winhc.bigdata.spark.utils.RowkeyRuleUtils._
 
 trait CompanyMapping {
 
-  def prepareFunctions(spark: SparkSession): Unit ={
+  def prepareFunctions(spark: SparkSession): Unit = {
     import spark._
     //清理特殊字符
     spark.udf.register("cleanup", (col: String) => {
@@ -22,6 +23,10 @@ trait CompanyMapping {
     spark.udf.register("case_no", (col: String) => {
       caseNo(col)
     })
+
+    spark.udf.register("rowkey_trans", (col: String, tab: String) => {
+      rowkey_trans(col, tab)
+    })
   }
 
   def prepare(spark: SparkSession): Unit = {

+ 19 - 4
src/main/scala/com/winhc/bigdata/spark/utils/CompanyIncrForCidsUtils.scala

@@ -23,7 +23,7 @@ case class CompanyIncrForCidsUtils(s: SparkSession,
 
   val tabMapping =
     Map("company_court_open_announcement" -> ("litigant_cids", ";") //开庭公告
-      ,"company_send_announcement" -> ("litigant_cids",",")//送达公告
+      , "company_send_announcement" -> ("litigant_cids", ",") //送达公告
     )
 
   val funMap =
@@ -32,6 +32,12 @@ case class CompanyIncrForCidsUtils(s: SparkSession,
       "company_court_announcement.litigant" -> "replace_char(litigant)"
     )
 
+  val rowkey_mapping: Map[String, String] =
+    Map(
+      "company_patent.pub_number" -> s"rowkey_trans(pub_number,'$mainTableName')",//专利
+      "company_patent.app_number" -> s"rowkey_trans(app_number,'$mainTableName')"
+    )
+
   //转换字段
   def trans(s: String): String = {
     val key = mainTableName + "." + s
@@ -42,6 +48,15 @@ case class CompanyIncrForCidsUtils(s: SparkSession,
     res
   }
 
+  def trans2(s: String) = {
+    val key = mainTableName + "." + s
+    var res = s
+    if (rowkey_mapping.contains(key)) {
+      res = rowkey_mapping(key)
+    }
+    res
+  }
+
   def calc(): Unit = {
     println(s"${this.getClass.getSimpleName} calc start! " + new Date().toString)
 
@@ -88,7 +103,7 @@ case class CompanyIncrForCidsUtils(s: SparkSession,
       runDs = BaseUtil.atDaysAfter(1, lastDsIncAds)
     }
 
-    val cols_md5 = dupliCols.filter(!_.equals("new_cid")).map(trans)
+    val cols_md5 = dupliCols.filter(!_.equals("new_cid")).map(trans).map(trans2)
 
     //增量ods和增量ads最后一个分区相等,跳出
     if (lastDsIncOds.equals(lastDsIncAds)) {
@@ -97,7 +112,7 @@ case class CompanyIncrForCidsUtils(s: SparkSession,
       val l1 = sql(s"show partitions $inc_ads_company_tb").collect.toList.map(_.getString(0).split("=")(1)).sorted
       if (l1.size > 1) {
         runDs = BaseUtil.atDaysAfter(1, l1(l1.size - 2))
-      }else{
+      } else {
         runDs = firstDsIncOds
       }
       //sys.exit(-1)
@@ -159,7 +174,7 @@ case class CompanyIncrForCidsUtils(s: SparkSession,
          |                    ,new_cid
          |                    ,cid
          |                    ,${sublistTableFieldName.mkString(",")}
-         |                    ,ROW_NUMBER() OVER (PARTITION BY cleanup(CONCAT_WS('',${dupliCols.map(trans).mkString(",")})) ORDER BY update_time DESC ) num
+         |                    ,ROW_NUMBER() OVER (PARTITION BY cleanup(CONCAT_WS('',${dupliCols.map(trans).map(trans2)mkString(",")})) ORDER BY update_time DESC ) num
          |            FROM    (
          |                        SELECT  "0" AS $f
          |                                ,CAST(new_cid AS STRING) AS new_cid

+ 55 - 0
src/main/scala/com/winhc/bigdata/spark/utils/RowkeyRuleUtils.scala

@@ -0,0 +1,55 @@
+package com.winhc.bigdata.spark.utils
+
+import cn.hutool.crypto.SecureUtil
+import org.apache.commons.lang3.StringUtils
+
+/**
+ * @Description:主键规则字段修改
+ * @author π
+ * @date 2020/9/49:28
+ */
+object RowkeyRuleUtils {
+  val md5Map =
+    Map("company_patent" -> "1"
+    )
+
+  val reg = "^CN.*".r
+
+  def rowkey_trans(s: String, name: String): String = {
+    var res = ""
+    if (md5Map.contains(name)) {
+      res = md5Map(name)
+    }
+    val r = res match {
+      case "1" => patent(s)
+      case _ => s
+    }
+    r
+  }
+
+  //专利规则
+  def patent(s: String): String = {
+    if (StringUtils.isBlank(s)) {
+      return ""
+    }
+    var r = s
+    val flag = reg.pattern.matcher(s.toUpperCase()).matches()
+    if (flag) {
+      r = s.substring(2)
+    }
+    r
+  }
+
+  def main(args: Array[String]): Unit = {
+
+    val r1 ="CN211281060U"
+    val r2 ="2019218036260"
+    val re1 = rowkey_trans(r1, "company_patent")
+    val re2 = rowkey_trans(r2, "company_patent")
+    val md5 = SecureUtil.md5(re1 + re2)
+    val md52 = SecureUtil.md5(r1 + r2)
+    println(md5)
+    println(md52)
+    println(re1 + " " + re2)
+  }
+}