|
@@ -21,7 +21,7 @@ trait BaseFunc {
|
|
|
@(transient@getter) protected val spark: SparkSession
|
|
|
private val pattern = "[^\\u4e00-\\u9fa5a-zA-Z \\(\\)().]+".r
|
|
|
|
|
|
- private val id_card_pattern = "^[1-9]\\d{5}(18|19|20)\\d{2}((0[1-9])|(1[0-2])|([0-1]\\*{1,2})|\\*{2})(([0-2][1-9])|10|20|30|31|\\*{2})\\d{3}[0-9Xx]$".r
|
|
|
+ private val id_card_pattern = "^[1-9]\\d{5}(19|20)\\d{2}((0[1-9])|(1[0-2])|([0-1]\\*{1,2})|\\*{2})(([0-2][1-9])|10|20|30|31|\\*{2})\\d{3}[0-9Xx]$".r
|
|
|
|
|
|
|
|
|
/* def to_epoch_millis_timestamp(): Unit = {
|
|
@@ -42,14 +42,23 @@ trait BaseFunc {
|
|
|
def id_card_trimOrRaw_udf(): Unit = {
|
|
|
spark.udf.register("id_card_trimOrRaw", id_card_trimOrRaw _)
|
|
|
}
|
|
|
+
|
|
|
def id_card_trimOrRaw(str: String): String = {
|
|
|
- if (StringUtils.isNotBlank(str) && (id_card_pattern matches str)) {
|
|
|
- return s"${str.substring(0, 10)}****${str.substring(str.length - 4)}".toUpperCase
|
|
|
- }
|
|
|
- str
|
|
|
+ if (StringUtils.isNotBlank(str) && (id_card_pattern matches str)) {
|
|
|
+ return s"${str.substring(0, 10)}****${str.substring(str.length - 4)}".toUpperCase
|
|
|
+ }
|
|
|
+ str
|
|
|
}
|
|
|
+
|
|
|
def is_id_card(): Unit = {
|
|
|
- spark.udf.register("is_id_card", (str: String) => id_card_pattern matches str)
|
|
|
+ val maxYear = BaseUtil.nowDate(pattern = "yyyy").toInt
|
|
|
+ spark.udf.register("is_id_card", (str: String) => {
|
|
|
+ if (id_card_pattern matches str) {
|
|
|
+ val d = str.substring(6, 10).toInt
|
|
|
+ return d <= maxYear
|
|
|
+ } else
|
|
|
+ false
|
|
|
+ })
|
|
|
}
|
|
|
|
|
|
def code2Name(): (Broadcast[Map[String, Seq[String]]], Broadcast[Map[String, Seq[String]]]) = {
|