Explorar o código

feat: 查老赖

- 失信人数据对接到查老赖上游
- 查老赖下游输出
许家凯 %!s(int64=4) %!d(string=hai) anos
pai
achega
a0985dd18d

+ 445 - 0
src/main/scala/com/winhc/bigdata/spark/jobs/deadbeat/deadbeat_info.scala

@@ -0,0 +1,445 @@
+package com.winhc.bigdata.spark.jobs.deadbeat
+
+import com.winhc.bigdata.spark.udf.BaseFunc
+import com.winhc.bigdata.spark.utils.{DateUtils, LoggingUtils, SparkUtils}
+import org.apache.commons.lang3.StringUtils
+import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction}
+import org.apache.spark.sql.types._
+import org.apache.spark.sql.{Row, SparkSession}
+
+import scala.annotation.meta.getter
+import scala.collection.mutable
+
+/**
+ * @Author: XuJiakai
+ * @Date: 2020/10/12 18:43
+ * @Description: 查失信 查老赖 下游处理
+ */
+case class deadbeat_info(s: SparkSession,
+                         project: String //表所在工程名
+                        ) extends LoggingUtils with BaseFunc {
+  @(transient@getter) val spark: SparkSession = s
+
+  private val env = "dev"
+
+  private val filter_ele = Seq(
+    "company_dishonest_info"
+    , "company_dishonest_info_human"
+  )
+
+  private def is_con(s: String): Boolean = {
+    for (e <- filter_ele)
+      if (s.startsWith(e))
+        return true
+    false
+  }
+
+  private val m = Map("company_dishonest_info" -> "失信人"
+    , "company_dishonest_info_human" -> "失信人"
+  )
+
+  private val ids_m = Map("company_dishonest_info" -> "1"
+    , "company_dishonest_info_human" -> "2")
+
+  class person_agg_label extends UserDefinedAggregateFunction {
+
+    private def getMax(str1: String, str2: String): String = {
+      if (StringUtils.isEmpty(str1)) {
+        return str2
+      }
+      if (StringUtils.isEmpty(str2)) {
+        return str1
+      }
+      Seq(str1, str2).max
+    }
+
+    override def inputSchema: StructType = StructType(Array[StructField](
+      StructField("rowkey", DataTypes.StringType)
+      , StructField("tn", DataTypes.StringType)
+      , StructField("deleted", DataTypes.StringType)
+      , StructField("publish_date", DataTypes.StringType)
+    ))
+
+
+    override def bufferSchema: StructType = StructType(Array(
+      StructField("ids", ArrayType(StringType, containsNull = false))
+      , StructField("pub_date", StringType)
+    ))
+
+    override def dataType: DataType = DataTypes.createMapType(DataTypes.StringType, DataTypes.StringType)
+
+    override def deterministic: Boolean = false
+
+    override def initialize(buffer: MutableAggregationBuffer): Unit = {
+      buffer.update(0, Seq.empty[String])
+      buffer.update(1, null)
+    }
+
+    override def update(buffer: MutableAggregationBuffer, input: Row): Unit = {
+      val rowkey = input.getString(0)
+      val tn = input.getString(1)
+      val deleted = input.getString(2)
+      val publish_date = DateUtils.toMillisTimestamp(input.getString(3))
+      if (deleted.equals("0")) {
+        buffer(0) = s"$tn@@$rowkey" +: buffer.getSeq[String](0)
+        buffer(1) = getMax(publish_date, buffer.getString(1))
+      }
+    }
+
+    override def merge(buffer1: MutableAggregationBuffer, buffer2: Row): Unit = {
+      buffer1(0) = buffer1.getSeq[String](0) ++ buffer2.getSeq[String](0)
+      buffer1(1) = getMax(buffer1.getString(1), buffer2.getString(1))
+    }
+
+    override def evaluate(buffer: Row): Any = {
+      val li = buffer.getSeq[String](0)
+      val publish_date = buffer.getString(1)
+      val ids = li
+        .filter(is_con)
+        .map(s => {
+          val strings = s.split("@@")
+          s"${ids_m(strings(0))}@@${strings(1)}"
+        }).mkString(",")
+
+      val labels = li.map(s => s.split("@@")(0)).map(s => m.getOrElse(s, null)).toSet.mkString(",")
+      if (StringUtils.isEmpty(ids)) {
+        Map(
+          "ids" -> null
+          , "deleted" -> "1"
+          , "labels" -> null
+          , "publish_date" -> null
+        )
+      } else {
+        Map(
+          "ids" -> ids
+          , "deleted" -> "0"
+          , "labels" -> labels
+          , "publish_date" -> publish_date
+        )
+      }
+    }
+  }
+
+  private def get_empty_map(rowkey: String, tn: String, deleted: String, publish_date: String): Map[String, String] = Map(
+    "ids" -> s"${ids_m(tn)}@@$rowkey"
+    , "deleted" -> deleted
+    , "labels" -> s"${m(tn)}"
+    , "publish_date" -> s"${DateUtils.toMillisTimestamp(publish_date)}"
+  )
+
+
+  private def get_gender(card_num: String): Int = {
+    if (StringUtils.isEmpty(card_num)) {
+      return -1
+    }
+
+    card_num.substring(16, 17).toInt % 2
+  }
+
+  private def get_birth_year(card_num: String) = card_num.substring(6, 10)
+
+  def init(): Unit = {
+    println(
+      s"""
+         |CREATE TABLE IF NOT EXISTS winhc_eci_dev.ads_deadbeat_person_out
+         |(
+         |    id  STRING COMMENT 'es id'
+         |    ,name STRING COMMENT '姓名'
+         |    ,card_num STRING COMMENT '身份证号'
+         |    ,birth_year STRING COMMENT '出生年'
+         |    ,gender STRING COMMENT '性别 -1未知 1男 0女'
+         |    ,province STRING COMMENT '省'
+         |    ,city STRING COMMENT '市'
+         |    ,district STRING COMMENT '区县'
+         |    ,ids STRING COMMENT '祥情页id'
+         |    ,label STRING COMMENT '标签'
+         |    ,deleted STRING   COMMENT '是否移除'
+         |)
+         |COMMENT 'TABLE COMMENT'
+         |PARTITIONED BY (ds STRING COMMENT '分区')
+         |""".stripMargin)
+
+    println(
+      s"""
+         |
+         |CREATE TABLE IF NOT EXISTS winhc_eci_dev.ads_deadbeat_company_out
+         |(
+         |    id STRING COMMENT 'es 索引id'
+         |    ,cid STRING COMMENT '公司cid'
+         |    ,name STRING COMMENT '公司名称'
+         |    ,card_num STRING COMMENT '组织机构代码'
+         |    ,legal_entity_id STRING COMMENT '法人id'
+         |    ,legal_entity_name STRING COMMENT '法人名字'
+         |    ,legal_entity_type STRING COMMENT '法人类型 公司或人'
+         |    ,reg_capital STRING COMMENT '注册资本,可视化字段'
+         |    ,reg_capital_amount STRING COMMENT '注册资本,整数到小数点后两位'
+         |    ,reg_capital_currency STRING COMMENT '注册资本单位'
+         |    ,estiblish_time STRING COMMENT '注册时间'
+         |    ,logo STRING COMMENT 'log url'
+         |    ,province STRING COMMENT '省'
+         |    ,city STRING COMMENT '市'
+         |    ,district STRING COMMENT '区'
+         |    ,publish_date STRING COMMENT '最后一次公布时间'
+         |    ,ids STRING COMMENT '祥情页id'
+         |    ,LABEL STRING COMMENT '标签'
+         |    ,deleted STRING COMMENT '是否移除'
+         |)
+         |COMMENT '查失信 查被执,企业表'
+         |PARTITIONED BY
+         |(
+         |    ds STRING COMMENT '分区'
+         |)
+         |""".stripMargin)
+  }
+
+
+  def reg_udf(): Unit = {
+    cleanup()
+    code2Name()
+    spark.udf.register("get_gender", get_gender _)
+    spark.udf.register("get_birth_year", get_birth_year _)
+    spark.udf.register("agg_label", new person_agg_label)
+    spark.udf.register("get_empty_map", get_empty_map _)
+
+    def toTime(str: String): String = DateUtils.toMillisTimestamp(str, pattern = "yyyy-MM-dd HH:mm:ss")
+
+    spark.udf.register("to_millis_timestamp", toTime _)
+  }
+
+  def person(): Unit = {
+    val target_tab = s"${getEnvProjectName(env, project)}.ads_deadbeat_person_out"
+    val org_tab = s"$project.ads_deadbeat_person"
+    val org_last_ds = getLastPartitionsOrElse(org_tab, "0")
+    val target_last_ds = getLastPartitionsOrElse(target_tab, "0")
+
+    sql(
+      s"""
+         |INSERT OVERWRITE TABLE $target_tab PARTITION(ds='$org_last_ds')
+         |SELECT  id
+         |        ,name
+         |        ,card_num
+         |        ,birth_year
+         |        ,gender
+         |        ,province
+         |        ,city
+         |        ,district
+         |        ,labels['ids'] as ids
+         |        ,labels['labels'] as label
+         |        ,labels['publish_date'] as publish_date
+         |        ,labels['deleted'] as deleted
+         |FROM    (
+         |            SELECT  md5(cleanup(CONCAT_WS('',card_num,name))) AS id
+         |                    ,name
+         |                    ,card_num
+         |                    ,get_birth_year(card_num) AS birth_year
+         |                    ,get_gender(card_num) AS gender
+         |                    ,get_province_name_pro(SUBSTRING(card_num,0,2)) AS province
+         |                    ,get_city_name(SUBSTRING(card_num,0,6)) AS city
+         |                    ,get_county_name(SUBSTRING(card_num,0,6)) AS district
+         |                    ,agg_label(rowkey,tn,deleted,publish_date) AS labels
+         |            FROM    winhc_eci_dev.ads_deadbeat_person
+         |            WHERE   ds > $target_last_ds
+         |            AND     card_num IS NOT NULL
+         |            GROUP BY name
+         |                     ,card_num
+         |            UNION ALL
+         |            SELECT  md5(cleanup(CONCAT_WS('',rowkey,name))) AS id
+         |                    ,name
+         |                    ,card_num
+         |                    ,NULL AS birth_year
+         |                    ,NULL AS gender
+         |                    ,NULL AS province
+         |                    ,NULL AS city
+         |                    ,NULL AS district
+         |                    ,get_empty_map(rowkey,tn,deleted,publish_date) AS labels
+         |            FROM    winhc_eci_dev.ads_deadbeat_person
+         |            WHERE   ds > $target_last_ds
+         |            AND     card_num IS NULL
+         |        )
+         |""".stripMargin)
+    //      .show(10000)
+
+  }
+
+
+  def company(): Unit = {
+    val target_tab = s"${getEnvProjectName(env, project)}.ads_deadbeat_company_out"
+    val org_tab = s"$project.ads_deadbeat_company"
+    val org_last_ds = getLastPartitionsOrElse(org_tab, "0")
+    val target_last_ds = getLastPartitionsOrElse(target_tab, "0")
+
+    val company_last_ds = getLastPartitionsOrElse(s"$project.ads_company", "0")
+    val intersect_company_cols = getColumns(s"$project.ads_company").intersect(getColumns(s"$project.inc_ads_company"))
+
+    sql(
+      s"""
+         |SELECT  *
+         |FROM    (
+         |            SELECT  *
+         |                    ,ROW_NUMBER() OVER(PARTITION BY cid ORDER BY ds DESC ) AS num
+         |            FROM    (
+         |                        SELECT  ${intersect_company_cols.mkString(",")}
+         |                        FROM    winhc_eci_dev.ads_company
+         |                        WHERE   ds = '$company_last_ds'
+         |                        UNION ALL
+         |                        SELECT  ${intersect_company_cols.mkString(",")}
+         |                        FROM    winhc_eci_dev.inc_ads_company
+         |                        WHERE   ds > '$company_last_ds'
+         |                    ) AS t1
+         |        ) AS t2
+         |WHERE   t2.num = 1
+         |""".stripMargin)
+      .createOrReplaceTempView("tmp_company_all")
+
+    sql(
+      s"""
+         |SELECT  t2.cid as id
+         |        ,t2.cid
+         |        ,t3.name
+         |        ,t3.org_number AS card_num
+         |        ,t3.legal_entity_id
+         |        ,null as legal_entity_name
+         |        ,t3.legal_entity_type
+         |        ,t3.reg_capital
+         |        ,t3.reg_capital_amount
+         |        ,t3.reg_capital_currency
+         |        ,to_millis_timestamp(t3.estiblish_time) as estiblish_time
+         |        ,t3.logo
+         |        ,get_province_name(t3.area_code) as province
+         |        ,get_city_name(t3.area_code) as city
+         |        ,get_county_name(t3.area_code) as district
+         |        ,t2.publish_date
+         |        ,t2.ids
+         |        ,t2.label
+         |        ,t2.deleted
+         |FROM    (
+         |            SELECT  cid
+         |                    ,labels['deleted'] AS deleted
+         |                    ,labels['ids'] AS ids
+         |                    ,labels['labels'] AS label
+         |                    ,labels['publish_date'] AS publish_date
+         |            FROM    (
+         |                        SELECT  cid
+         |                                ,agg_label(rowkey,tn,deleted,publish_date) AS labels
+         |                        FROM    winhc_eci_dev.ads_deadbeat_company
+         |                        WHERE   ds > '$target_last_ds'
+         |                        AND     cid IS NOT NULL
+         |                        GROUP BY cid
+         |                    ) AS t1
+         |        ) AS t2
+         |JOIN    tmp_company_all AS t3
+         |ON      t2.cid = t3.cid
+         |""".stripMargin)
+      .createOrReplaceTempView("ads_deadbeat_company_out_tmp")
+
+
+    val human_last_ds = getLastPartitionsOrElse("winhc_eci_dev.ads_company_human_relation", "0")
+
+    sql(
+      s"""
+         |SELECT  rowkey
+         |        ,human_name
+         |FROM    (
+         |            SELECT  *
+         |                    ,ROW_NUMBER() OVER(PARTITION BY rowkey ORDER BY ds DESC ) AS num
+         |            FROM    (
+         |                        SELECT  rowkey
+         |                                ,human_name
+         |                                ,ds
+         |                        FROM    winhc_eci_dev.ads_company_human_relation
+         |                        WHERE   ds = '$human_last_ds'
+         |                        UNION ALL
+         |                        SELECT  rowkey
+         |                                ,human_name
+         |                                ,ds
+         |                        FROM    winhc_eci_dev.inc_ads_company_human_relation
+         |                        WHERE   ds > '$human_last_ds'
+         |                    ) AS t1
+         |        ) AS t2
+         |WHERE   t2.num = 1
+         |""".stripMargin)
+      .createOrReplaceTempView("human_all_tmp")
+
+    sql(
+      s"""
+         |INSERT OVERWRITE TABLE $target_tab PARTITION(ds='$org_last_ds')
+         |SELECT  t1.id
+         |        ,t1.cid
+         |        ,t1.name
+         |        ,t1.card_num
+         |        ,t1.legal_entity_id
+         |        ,t2.human_name AS legal_entity_name
+         |        ,t1.legal_entity_type
+         |        ,t1.reg_capital
+         |        ,t1.reg_capital_amount
+         |        ,t1.reg_capital_currency
+         |        ,t1.estiblish_time
+         |        ,t1.logo
+         |        ,t1.province
+         |        ,t1.city
+         |        ,t1.district
+         |        ,t1.publish_date
+         |        ,t1.ids
+         |        ,t1.LABEL
+         |        ,t1.deleted
+         |FROM    (
+         |            SELECT  *
+         |            FROM    ads_deadbeat_company_out_tmp
+         |            WHERE   legal_entity_type = 1
+         |        ) AS t1
+         |LEFT JOIN (
+         |              SELECT  *
+         |              FROM    human_all_tmp
+         |          ) AS t2
+         |ON      CONCAT_WS('_',cid,legal_entity_id) = t2.rowkey
+         |UNION ALL
+         |SELECT  t1.id
+         |        ,t1.cid
+         |        ,t1.name
+         |        ,t1.card_num
+         |        ,t1.legal_entity_id
+         |        ,t2.cname AS legal_entity_name
+         |        ,t1.legal_entity_type
+         |        ,t1.reg_capital
+         |        ,t1.reg_capital_amount
+         |        ,t1.reg_capital_currency
+         |        ,t1.estiblish_time
+         |        ,t1.logo
+         |        ,t1.province
+         |        ,t1.city
+         |        ,t1.district
+         |        ,t1.publish_date
+         |        ,t1.ids
+         |        ,t1.LABEL
+         |        ,t1.deleted
+         |FROM    (
+         |            SELECT  *
+         |            FROM    ads_deadbeat_company_out_tmp
+         |            WHERE   legal_entity_type = 2
+         |        ) AS t1
+         |LEFT JOIN (
+         |              SELECT  cid,name as cname
+         |              FROM    tmp_company_all
+         |          ) AS t2
+         |ON      legal_entity_id = t2.cid
+         |""".stripMargin)
+    //      .show(1000)
+
+  }
+}
+
+object deadbeat_info {
+  def main(args: Array[String]): Unit = {
+    val config = mutable.Map(
+      "spark.hadoop.odps.project.name" -> "winhc_eci_dev",
+      "spark.hadoop.odps.spark.local.partition.amt" -> "10000"
+    )
+    val spark: SparkSession = SparkUtils.InitEnv(this.getClass.getSimpleName, config)
+    val di = deadbeat_info(spark, "winhc_eci_dev")
+    di.reg_udf()
+    di.person()
+    di.company()
+    spark.stop()
+  }
+
+}

+ 187 - 0
src/main/scala/com/winhc/bigdata/spark/jobs/deadbeat/dishonest_info.scala

@@ -0,0 +1,187 @@
+package com.winhc.bigdata.spark.jobs.deadbeat
+
+import com.winhc.bigdata.spark.utils.{LoggingUtils, SparkUtils}
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.SparkSession
+
+import scala.annotation.meta.getter
+import scala.collection.mutable
+
+/**
+ * @Author: XuJiakai
+ * @Date: 2020/10/12 15:26
+ * @Description: 失信人企业+个人 增量+全量
+ */
+case class dishonest_info(s: SparkSession,
+                          project: String //表所在工程名
+                         ) extends LoggingUtils with Logging {
+  @(transient@getter) val spark: SparkSession = s
+
+  private def is_deleted(status: Int, deleted: Int): String = if ((status + deleted) == 0) "0" else "1"
+
+  private def calc(is_person: Boolean = true): Unit = {
+    val tn = is_person match {
+      case true => "company_dishonest_info_human"
+      case false => "company_dishonest_info"
+    }
+
+    val target_table = is_person match {
+      case true => s"$project.ads_deadbeat_person"
+      case false => s"$project.ads_deadbeat_company"
+    }
+    val inc_ads_table = s"$project.inc_ads_$tn"
+    val ads_table = s"$project.ads_$tn"
+
+    val intersect_cols = getColumns(inc_ads_table).intersect(getColumns(ads_table))
+    val inc_ads_last_ds = getLastPartitionsOrElse(inc_ads_table, "0")
+    val ads_last_ds = getLastPartitionsOrElse(ads_table, "0")
+    spark.udf.register("is_deleted", is_deleted _)
+
+    val view_fields =
+      s"""
+         |rowkey
+         |${
+        is_person match {
+          case true => ""
+          case false => ",cid"
+        }
+      }
+         |,name
+         |,card_num
+         |,pub_date as publish_date
+         |,is_deleted(status,deleted) as deleted
+         |""".stripMargin
+
+    def all(): Unit = {
+      sql(
+        s"""
+           |INSERT OVERWRITE TABLE $target_table PARTITION(ds='$inc_ads_last_ds',tn='$tn')
+           |SELECT  $view_fields
+           |FROM    (
+           |            SELECT  *
+           |                    ,ROW_NUMBER() OVER(PARTITION BY rowkey ORDER BY ds DESC ) AS num
+           |            FROM    (
+           |                        SELECT  ${intersect_cols.mkString(",")}
+           |                        FROM    $ads_table
+           |                        WHERE   ds = '$ads_last_ds'
+           |                        UNION ALL
+           |                        SELECT  ${intersect_cols.mkString(",")}
+           |                        FROM    $inc_ads_table
+           |                        WHERE   ds > '$ads_last_ds'
+           |                    ) AS t1
+           |        ) AS t2
+           |WHERE   t2.num = 1
+           |""".stripMargin)
+    }
+
+    def inc(last_ds: String): Unit = {
+      val tmp_tab = s"tmp_deadbeat_$tn"
+      sql(
+        s"""
+           |SELECT  $view_fields
+           |        ,ds
+           |FROM    $inc_ads_table
+           |WHERE   ds > '$last_ds'
+           |""".stripMargin)
+        .cache()
+        .createOrReplaceTempView(tmp_tab)
+
+      sql(
+        s"""
+           |INSERT OVERWRITE TABLE $target_table PARTITION(ds='$inc_ads_last_ds',tn='$tn')
+           |SELECT  rowkey
+           |         ${
+          is_person match {
+            case true => ""
+            case false => ",cid"
+          }
+        }
+           |        ,name
+           |        ,card_num
+           |        ,publish_date
+           |        ,deleted
+           |FROM    (
+           |            SELECT  *
+           |                    ,ROW_NUMBER() OVER(PARTITION BY rowkey ORDER BY ds DESC ) AS num
+           |            FROM    (
+           |                        SELECT  rowkey
+           |                        ${
+          is_person match {
+            case true => ""
+            case false => ",cid"
+          }
+        }
+           |                                ,name
+           |                                ,card_num
+           |                                ,publish_date
+           |                                ,deleted
+           |                                ,ds
+           |                        FROM    $tmp_tab
+           |                        UNION ALL
+           |                        SELECT  t2.rowkey
+           |                        ${
+          is_person match {
+            case true => ""
+            case false => ",t2.cid"
+          }
+        }
+           |                                ,t2.name
+           |                                ,t2.card_num
+           |                                ,t2.publish_date
+           |                                ,t2.deleted
+           |                                ,t2.ds
+           |                        FROM    (
+           |                                    SELECT  DISTINCT concat_ws('_',name,card_num) AS d_id
+           |                                    FROM    $tmp_tab
+           |                                ) AS t1
+           |                        JOIN    (
+           |                                    SELECT  concat_ws('_',name,card_num) AS d_id
+           |                                            ,*
+           |                                    FROM    $target_table
+           |                                    WHERE   ds > '$last_ds'
+           |                                    AND     tn = '$tn'
+           |                                ) AS t2
+           |                        ON      t1.d_id = t2.d_id
+           |                    ) AS t3
+           |        ) AS t4
+           |WHERE   t4.num = 1
+           |""".stripMargin)
+
+    }
+
+    val ds_cols = sql(s"show partitions $target_table").collect()
+      .map(_.getString(0))
+      .filter(_.contains(tn))
+      .flatMap(_.split("/"))
+      .filter(_.contains("ds"))
+      .map(_.split("=")(1))
+    if (ds_cols.isEmpty) {
+      println("全量计算,失信人-个人")
+      all()
+    } else {
+      val max = ds_cols.max
+      println(s"增量计算:$max,失信人-个人")
+      inc(max)
+    }
+  }
+
+  def person(): Unit = calc()
+
+  def company(): Unit = calc(false)
+
+}
+
+object dishonest_info {
+  def main(args: Array[String]): Unit = {
+    val config = mutable.Map(
+      "spark.hadoop.odps.project.name" -> "winhc_eci_dev",
+      "spark.hadoop.odps.spark.local.partition.amt" -> "1000"
+    )
+    val spark: SparkSession = SparkUtils.InitEnv(this.getClass.getSimpleName, config)
+    val di = dishonest_info(spark, "winhc_eci_dev")
+    di.person()
+    di.company()
+
+    spark.stop()
+  }
+}

+ 14 - 2
src/main/scala/com/winhc/bigdata/spark/udf/BaseFunc.scala

@@ -65,14 +65,21 @@ trait BaseFunc {
       (r.getString(0), Seq(r.getString(1), r.getString(2), r.getString(3)))
     }).toMap)
 
-    val areaCode2Name = spark.sparkContext.broadcast(spark.sql(
+    val code_df = spark.sql(
       s"""
          |select area_code,province,city,district
          |from winhc_eci_dev.ods_area_code where ds = '20200604'
-      """.stripMargin).collect().map(r => {
+      """.stripMargin).collect()
+
+    val areaCode2Name = spark.sparkContext.broadcast(code_df.map(r => {
       (r.getString(0), Seq(r.getString(1), r.getString(2), r.getString(3)))
     }).toMap)
 
+    val province_code = spark.sparkContext.broadcast(code_df.map(r => {
+      (r.getString(0).substring(0, 2), r.getString(1))
+    }).toMap)
+
+
     spark.udf.register("get_category_first", (code: String) => {
       CompanyIndexSave2EsHelper.get_seq_by_index(categoryCode2Name, code, 0)
     })
@@ -86,6 +93,11 @@ trait BaseFunc {
     spark.udf.register("get_province_name", (code: String) => {
       CompanyIndexSave2EsHelper.get_seq_by_index(areaCode2Name, code, 0)
     })
+
+    spark.udf.register("get_province_name_pro", (code: String) => {
+      province_code.value.getOrElse(code, null)
+    })
+
     spark.udf.register("get_city_name", (code: String) => {
       CompanyIndexSave2EsHelper.get_seq_by_index(areaCode2Name, code, 1)
     })