Browse Source

fix: 身份证出生日期加入时间范围判断

许家凯 4 years ago
parent
commit
1f67c4a67a

+ 10 - 7
src/main/scala/com/winhc/bigdata/spark/jobs/deadbeat/deadbeat_info.scala

@@ -30,7 +30,6 @@ case class deadbeat_info(s: SparkSession,
   )
 
   private def is_con(s: String): Boolean = {
-
     for (e <- filter_ele)
       if (s.split("@@")(0).equals(e))
         return true
@@ -148,7 +147,6 @@ case class deadbeat_info(s: SparkSession,
     if (StringUtils.isEmpty(card_num)) {
       return -1
     }
-
     card_num.substring(16, 17).toInt % 2
   }
 
@@ -376,7 +374,8 @@ case class deadbeat_info(s: SparkSession,
          |                    ) AS t3
          |        ) AS t4
          |WHERE   t4.num = 1
-         |""".stripMargin).createOrReplaceTempView("all_deadbeat_tmp_company_tmp")
+         |""".stripMargin)
+      .createOrReplaceTempView("all_deadbeat_tmp_company_tmp")
 
     sql(
       s"""
@@ -508,7 +507,6 @@ case class deadbeat_info(s: SparkSession,
          |          ) AS t2
          |ON      legal_entity_id = t2.cid
          |""".stripMargin)
-    //      .show(1000)
 
   }
 }
@@ -523,21 +521,26 @@ object deadbeat_info {
     val di = deadbeat_info(spark, "winhc_eci_dev")
     di.reg_udf()
     AsyncExtract.startAndWait(spark, Seq(
-      ("前置处理。。。", () => {
+      ("前置处理。。。", () => {
         di.personPre()
+        true
+      })
+      , ("企业前置处理。。。", () => {
         di.companyPre()
         true
       })
     ))
 
     AsyncExtract.startAndWait(spark, Seq(
-      ("下游处理。。。", () => {
+      ("下游处理。。。", () => {
         di.person()
+        true
+      })
+      , ("企业下游处理。。。", () => {
         di.company()
         true
       })
     ))
     spark.stop()
   }
-
 }

+ 15 - 6
src/main/scala/com/winhc/bigdata/spark/udf/BaseFunc.scala

@@ -21,7 +21,7 @@ trait BaseFunc {
   @(transient@getter) protected val spark: SparkSession
   private val pattern = "[^\\u4e00-\\u9fa5a-zA-Z \\(\\)().]+".r
 
-  private val id_card_pattern = "^[1-9]\\d{5}(18|19|20)\\d{2}((0[1-9])|(1[0-2])|([0-1]\\*{1,2})|\\*{2})(([0-2][1-9])|10|20|30|31|\\*{2})\\d{3}[0-9Xx]$".r
+  private val id_card_pattern = "^[1-9]\\d{5}(19|20)\\d{2}((0[1-9])|(1[0-2])|([0-1]\\*{1,2})|\\*{2})(([0-2][1-9])|10|20|30|31|\\*{2})\\d{3}[0-9Xx]$".r
 
 
   /* def to_epoch_millis_timestamp(): Unit = {
@@ -42,14 +42,23 @@ trait BaseFunc {
   def id_card_trimOrRaw_udf(): Unit = {
     spark.udf.register("id_card_trimOrRaw", id_card_trimOrRaw _)
   }
+
   def id_card_trimOrRaw(str: String): String = {
-      if (StringUtils.isNotBlank(str) && (id_card_pattern matches str)) {
-        return s"${str.substring(0, 10)}****${str.substring(str.length - 4)}".toUpperCase
-      }
-      str
+    if (StringUtils.isNotBlank(str) && (id_card_pattern matches str)) {
+      return s"${str.substring(0, 10)}****${str.substring(str.length - 4)}".toUpperCase
+    }
+    str
   }
+
   def is_id_card(): Unit = {
-    spark.udf.register("is_id_card", (str: String) => id_card_pattern matches str)
+    val maxYear = BaseUtil.nowDate(pattern = "yyyy").toInt
+    spark.udf.register("is_id_card", (str: String) => {
+      if (id_card_pattern matches str) {
+        val d = str.substring(6, 10).toInt
+        return d <= maxYear
+      } else
+        false
+    })
   }
 
   def code2Name(): (Broadcast[Map[String, Seq[String]]], Broadcast[Map[String, Seq[String]]]) = {