|
@@ -1,7 +1,6 @@
|
|
package com.winhc.bigdata.spark.udf
|
|
package com.winhc.bigdata.spark.udf
|
|
|
|
|
|
import com.winhc.bigdata.spark.implicits.CompanyIndexSave2EsHelper
|
|
import com.winhc.bigdata.spark.implicits.CompanyIndexSave2EsHelper
|
|
-import com.winhc.bigdata.spark.implicits.RegexUtils._
|
|
|
|
import com.winhc.bigdata.spark.utils.BaseUtil
|
|
import com.winhc.bigdata.spark.utils.BaseUtil
|
|
import com.winhc.bigdata.spark.utils.BaseUtil._
|
|
import com.winhc.bigdata.spark.utils.BaseUtil._
|
|
import org.apache.commons.lang3.StringUtils
|
|
import org.apache.commons.lang3.StringUtils
|
|
@@ -21,7 +20,6 @@ trait BaseFunc {
|
|
@(transient@getter) protected val spark: SparkSession
|
|
@(transient@getter) protected val spark: SparkSession
|
|
private val pattern = "[^\\u4e00-\\u9fa5a-zA-Z \\(\\)().]+".r
|
|
private val pattern = "[^\\u4e00-\\u9fa5a-zA-Z \\(\\)().]+".r
|
|
|
|
|
|
- private val id_card_pattern = "^[1-9]\\d{5}(19|20)\\d{2}((0[1-9])|(1[0-2])|([0-1]\\*{1,2})|\\*{2})(([0-2][1-9])|10|20|30|31|\\*{2})\\d{3}[0-9Xx]$".r
|
|
|
|
|
|
|
|
|
|
|
|
/* def to_epoch_millis_timestamp(): Unit = {
|
|
/* def to_epoch_millis_timestamp(): Unit = {
|
|
@@ -38,27 +36,8 @@ trait BaseFunc {
|
|
spark.udf.register("id_card_trim", id_card_trim _)
|
|
spark.udf.register("id_card_trim", id_card_trim _)
|
|
}
|
|
}
|
|
|
|
|
|
- //身份证号格式统一:若判断是身份证则修剪一下,否则原样返回
|
|
|
|
- def id_card_trimOrRaw_udf(): Unit = {
|
|
|
|
- spark.udf.register("id_card_trimOrRaw", id_card_trimOrRaw _)
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- def id_card_trimOrRaw(str: String): String = {
|
|
|
|
- if (StringUtils.isNotBlank(str) && (id_card_pattern matches str)) {
|
|
|
|
- return s"${str.substring(0, 10)}****${str.substring(str.length - 4)}".toUpperCase
|
|
|
|
- }
|
|
|
|
- str
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- def is_id_card(): Unit = {
|
|
|
|
- val maxYear = BaseUtil.nowDate(pattern = "yyyy").toInt
|
|
|
|
- spark.udf.register("is_id_card", (str: String) => {
|
|
|
|
- if (id_card_pattern matches str) {
|
|
|
|
- val d = str.substring(6, 10).toInt
|
|
|
|
- return d <= maxYear
|
|
|
|
- } else
|
|
|
|
- false
|
|
|
|
- })
|
|
|
|
|
|
+ def is_id_card_udf(): Unit = {
|
|
|
|
+ spark.udf.register("is_id_card", is_id_card _)
|
|
}
|
|
}
|
|
|
|
|
|
def code2Name(): (Broadcast[Map[String, Seq[String]]], Broadcast[Map[String, Seq[String]]]) = {
|
|
def code2Name(): (Broadcast[Map[String, Seq[String]]], Broadcast[Map[String, Seq[String]]]) = {
|