Browse Source

司法案件456维度将身份证补全移到外部(单独)

晏永年 4 years ago
parent
commit
3741047b84

+ 2 - 195
src/main/scala/com/winhc/bigdata/spark/jobs/judicial/JudicialCaseRelationPre456.scala

@@ -60,7 +60,7 @@ case class JudicialCaseRelationPre456(s: SparkSession, project: String
          |      select
          |      md5(cleanup(case_no)) as judicase_id
          |      ,"4" as flag
-         |      ,concat_ws('',plaintiff,'与',defendant,case_reason) as title
+         |      ,title(plaintiff,defendant,'送达公告') as title
          |      ,concat_ws('',case_type(case_no)) as case_type
          |      ,case_reason
          |      ,case_no
@@ -134,200 +134,7 @@ case class JudicialCaseRelationPre456(s: SparkSession, project: String
          |where num = 1
          |""".stripMargin).show(10, false)
     //限制高消费预处理(个人)
-    val columns: Seq[String] = spark.table(s"$project.inc_ads_company_zxr_restrict_person").schema.map(_.name).filter(_!="flag")
     lastDsIncAds = BaseUtil.getPartion(s"$project.inc_ads_company_zxr_restrict_person", spark)
-    spark.sparkContext.setJobDescription(s"处理zxr_restrict_person($lastDsIncAds)")
-    //1、先从被执行人中用name和case_no关联补全身份证号码
-    sql(
-      s"""
-         |--先从company_zxr_restrict_person存量、增量表获取
-         |SELECT rowkey
-         |    ,IF(A.identity_num IS NULL AND B.card IS NOT NULL,1,0) AS flag
-         |    ,new_cid
-         |    ,cid
-         |    ,id
-         |    ,name_hid
-         |    ,name
-         |    ,sex
-         |    ,COALESCE(A.identity_num,B.card) AS identity_num
-         |    ,court_name
-         |    ,court_code
-         |    ,case_create_time
-         |    ,A.case_no AS case_no
-         |    ,content
-         |    ,oss_path
-         |    ,file_path
-         |    ,province
-         |    ,xgid
-         |    ,company_name
-         |    ,company_info
-         |    ,source
-         |    ,status
-         |    ,A.appro_time AS appro_time
-         |    ,A.create_time AS create_time
-         |    ,A.update_time AS update_time
-         |    ,A.deleted AS deleted
-         |FROM(
-         |    SELECT *
-         |    FROM(
-         |        SELECT *
-         |        FROM(
-         |            SELECT *
-         |            ,ROW_NUMBER() OVER (PARTITION BY cleanup(CONCAT_WS('',name,case_no)) ORDER BY update_time DESC ) num
-         |            FROM(
-         |                SELECT ${columns.mkString(",")}
-         |                FROM $project.ads_company_zxr_restrict_person
-         |                WHERE ds>'0'
-         |                UNION ALL
-         |                SELECT ${columns.mkString(",")}
-         |                FROM $project.inc_ads_company_zxr_restrict_person
-         |                WHERE ds>'0'
-         |            )
-         |        )
-         |        WHERE num=1
-         |    )
-         | ) A
-         |LEFT JOIN
-         |(
-         |    SELECT *
-         |    FROM(
-         |        SELECT *
-         |        ,ROW_NUMBER() OVER (PARTITION BY cleanup(CONCAT_WS('',cname,case_no)) ORDER BY update_time DESC ) num
-         |        FROM (
-         |            SELECT cname,card,case_no,type,update_time
-         |            FROM $project.ods_company_zxr
-         |            WHERE ds>'0'
-         |            UNION ALL
-         |            SELECT cname,card,case_no,type,update_time
-         |            FROM $project.inc_ods_company_zxr
-         |            WHERE ds>'0'
-         |        )
-         |    )
-         |    WHERE num=1 AND type='1'
-         |) B
-         |ON A.name=B.cname AND A.case_no=B.case_no
-         |""".stripMargin
-    ).createOrReplaceTempView("tmp_person_cloze_1")
-    //2、再从失信人中用name和case_no关联补全身份证号码
-    sql(
-      s"""
-         |--再从前面结果表获取
-         |SELECT rowkey
-         |    ,IF(A.identity_num IS NULL AND C.card_num IS NOT NULL,2,A.flag) AS flag
-         |    ,A.new_cid
-         |    ,A.cid
-         |    ,A.id
-         |    ,name_hid
-         |    ,A.name
-         |    ,A.sex
-         |    ,COALESCE(A.identity_num,C.card_num) AS identity_num
-         |    ,A.court_name
-         |    ,A.court_code
-         |    ,A.case_create_time
-         |    ,A.case_no
-         |    ,A.content
-         |    ,A.oss_path
-         |    ,A.file_path
-         |    ,A.province
-         |    ,A.xgid
-         |    ,A.company_name
-         |    ,A.company_info
-         |    ,A.source
-         |    ,A.status
-         |    ,A.appro_time
-         |    ,A.create_time
-         |    ,A.update_time
-         |    ,A.deleted
-         |FROM tmp_person_cloze_1 A
-         |LEFT JOIN
-         |(
-         |    SELECT *
-         |    FROM(
-         |        SELECT *
-         |        ,ROW_NUMBER() OVER (PARTITION BY cleanup(CONCAT_WS('',name,case_no)) ORDER BY update_time DESC ) num
-         |        FROM (
-         |            SELECT name,card_num,case_no,update_time
-         |            FROM $project.ods_company_dishonest_info
-         |            WHERE ds>'0'
-         |            UNION ALL
-         |            SELECT name,card_num,case_no,update_time
-         |            FROM $project.inc_ods_company_dishonest_info
-         |            WHERE ds>'0'
-         |        )
-         |    )
-         |    WHERE num=1
-         |) C
-         |ON A.name=C.name AND A.case_no=C.case_no
-         |""".stripMargin
-    ).createOrReplaceTempView("tmp_person_cloze_2")
-    //3、再从自身表中根据name和company_name去补全身份证号码
-    sql(
-      s"""
-         |--先从前面结果表获取
-         |INSERT OVERWRITE TABLE winhc_eci_dev.ads_company_zxr_restrict_person_cloze PARTITION(ds=$lastDsIncAds)
-         |SELECT rowkey
-         |  ,flag
-         |  ,new_cid
-         |  ,cid
-         |  ,id
-         |  ,name_hid
-         |  ,name
-         |  ,sex
-         |  ,identity_num
-         |  ,court_name
-         |  ,court_code
-         |  ,case_create_time
-         |  ,case_no
-         |  ,content
-         |  ,oss_path
-         |  ,file_path
-         |  ,province
-         |  ,xgid
-         |  ,company_name
-         |  ,company_info
-         |  ,source
-         |  ,status
-         |  ,appro_time
-         |  ,create_time
-         |  ,update_time
-         |  ,deleted
-         |FROM(
-         |  SELECT A.rowkey
-         |      ,IF(A.identity_num IS NULL AND D.identity_num IS NOT NULL,3,A.flag) AS flag
-         |      ,A.new_cid
-         |      ,A.cid
-         |      ,A.id
-         |      ,A.name_hid
-         |      ,A.name
-         |      ,A.sex
-         |      ,COALESCE(A.identity_num,D.identity_num) AS identity_num
-         |      ,A.court_name
-         |      ,A.court_code
-         |      ,A.case_create_time
-         |      ,A.case_no
-         |      ,A.content
-         |      ,A.oss_path
-         |      ,A.file_path
-         |      ,A.province
-         |      ,A.xgid
-         |      ,A.company_name
-         |      ,A.company_info
-         |      ,A.source
-         |      ,A.status
-         |      ,A.appro_time
-         |      ,A.create_time
-         |      ,A.update_time
-         |      ,A.deleted
-         |      ,ROW_NUMBER() OVER (PARTITION BY A.name,A.case_no ORDER BY A.flag, A.identity_num, A.update_time DESC ) num
-         |  FROM tmp_person_cloze_2 A
-         |  LEFT JOIN
-         |  tmp_person_cloze_2 D
-         |  ON A.name=D.name AND (cleanup(A.company_name)=cleanup(D.company_name) OR (cleanup(A.company_info)=cleanup(D.company_info)))--根据实际数据情况
-         |)
-         |WHERE num=1
-         |""".stripMargin
-    )//.createOrReplaceTempView("tmp_person_cloze_3")
-
     sql(
       s"""
          |insert ${if (isWindows) "INTO" else "OVERWRITE"} table $project.ads_judicial_case_relation_pre partition(ds='$lastDsIncAds',tn='zxr_restrict_person')
@@ -348,7 +155,7 @@ case class JudicialCaseRelationPre456(s: SparkSession, project: String
          |from (
          |      select
          |      md5(cleanup(case_no)) as judicase_id
-         |      ,"6" as flag
+         |      ,"11" as flag
          |      ,concat_ws('',name,'被采取限制消费措施') AS title
          |      ,concat_ws('',case_type(case_no)) as case_type
          |      ,NULL AS case_reason

+ 10 - 0
src/main/scala/com/winhc/bigdata/spark/udf/BaseFunc.scala

@@ -38,6 +38,16 @@ trait BaseFunc {
     spark.udf.register("id_card_trim", id_card_trim _)
   }
 
+  //身份证号格式统一:若判断是身份证则修剪一下,否则原样返回
+  def id_card_trimOrRaw_udf(): Unit = {
+    spark.udf.register("id_card_trimOrRaw", id_card_trimOrRaw _)
+  }
+  def id_card_trimOrRaw(str: String): String = {
+      if (StringUtils.isNotBlank(str) && (id_card_pattern matches str)) {
+        return s"${str.substring(0, 10)}****${str.substring(str.length - 4)}".toUpperCase
+      }
+      str
+  }
   def is_id_card(): Unit = {
     spark.udf.register("is_id_card", (str: String) => id_card_pattern matches str)
   }