Pārlūkot izejas kodu

feat: 身份证号格式统一

- 添加身份证号统一提取函数
- 身份证号校验规则兼容新格式
许家凯 4 gadi atpakaļ
vecāks
revīzija
aa36f6559f

+ 5 - 1
src/main/scala/com/winhc/bigdata/spark/udf/BaseFunc.scala

@@ -21,7 +21,7 @@ trait BaseFunc {
   @(transient@getter) protected val spark: SparkSession
   private val pattern = "[^\\u4e00-\\u9fa5a-zA-Z \\(\\)().]+".r
 
-  private val id_card_pattern = "^[1-9]\\d{5}(18|19|20)\\d{2}((0[1-9])|(1[0-2])|\\*{2})(([0-2][1-9])|10|20|30|31|\\*{2})\\d{3}[0-9Xx]$".r
+  private val id_card_pattern = "^[1-9]\\d{5}(18|19|20)\\d{2}((0[1-9])|(1[0-2])|([0-1]\\*{1,2})|\\*{2})(([0-2][1-9])|10|20|30|31|\\*{2})\\d{3}[0-9Xx]$".r
 
 
   /* def to_epoch_millis_timestamp(): Unit = {
@@ -34,6 +34,10 @@ trait BaseFunc {
     spark.udf.register("case_no_trim", case_no_trim _)
   }
 
+  def id_card_trim_udf(): Unit = {
+    spark.udf.register("id_card_trim", id_card_trim _)
+  }
+
   def is_id_card(): Unit = {
     spark.udf.register("is_id_card", (str: String) => id_card_pattern matches str)
   }

+ 14 - 0
src/main/scala/com/winhc/bigdata/spark/utils/BaseUtil.scala

@@ -256,8 +256,22 @@ object BaseUtil {
     } else null
   }
 
+  /**
+   * 身份证号格式统一
+   *
+   * @param str
+   * @return
+   */
+  def id_card_trim(str: String): String = {
+    if (StringUtils.isNotBlank(str)) {
+      return s"${str.substring(0, 10)}****${str.substring(str.length - 4)}".toUpperCase
+    }
+    null
+  }
+
   def main(args: Array[String]): Unit = {
     println(case_no_trim("(2015)怀执字第03601号号"))
+    println(id_card_trim("41111119990****062x"))
   }
 
 }