فهرست منبع

Merge remote-tracking branch 'origin/master'

许家凯 4 سال پیش
والد
کامیت
271d164b14
1فایلهای تغییر یافته به همراه4 افزوده شده و 3 حذف شده
  1. 4 3
      src/main/scala/com/winhc/bigdata/spark/utils/IDCard_Completion_Utils.scala

+ 4 - 3
src/main/scala/com/winhc/bigdata/spark/utils/IDCard_Completion_Utils.scala

@@ -31,7 +31,7 @@ case class IDCard_Completion_Utils(s: SparkSession,
     mapTables("company_zxr") = ("cids", "cname", "card", "null", "case_no", "court", "1")
     mapTables("company_dishonest_info") = ("cid", "name", "card_num", "null", "case_no", "court", "2")
     mapTables("company_zxr_final_case") = ("cid", "name", "identity_num", "null", "case_no", "court_name", "3")
-    mapTables("company_zxr_restrict") = ("cid", "name", "identity_num", "coalesce(company_name,company_info)", "court_name", "case_no", "4")
+    mapTables("company_zxr_restrict") = ("cid", "name", "identity_num", "coalesce(company_name,company_info)", "case_no", "court_name", "4")
     var lastDsIncOds: String = ""
     var minDs: String = ""
     var maxDs: String = ""
@@ -62,6 +62,7 @@ case class IDCard_Completion_Utils(s: SparkSession,
       println("not all tables have the same partition of newest !!!")
       sys.exit(-1)
     }
+    is_id_card()
     id_card_trimOrRaw_udf()
     lastDsIncOds = minDs
     spark.sparkContext.setJobDescription(s"补全身份证号码:${mapTables.size}个表聚合($lastDsIncOds)")
@@ -71,7 +72,7 @@ case class IDCard_Completion_Utils(s: SparkSession,
          |FROM $project.ods_${m._1}
          |WHERE ds>'0' AND ${m._2._1} IS NULL
          |UNION ALL
-         |SELECT ${m._2._1} AS name, ${m._2._2} AS identity_num, ${m._2._3} AS company_name, ${m._2._4} AS case_no, ${m._2._5} AS court_name, ${m._2._6} AS source, ${m._2._6} AS flag
+         |SELECT ${m._2._2} AS name, ${m._2._3} AS identity_num, ${m._2._4} AS company_name, ${m._2._5} AS case_no, ${m._2._6} AS court_name, ${m._2._7} AS source, ${m._2._7} AS flag
          |FROM $project.inc_ods_${m._1}
          |WHERE ds>'0' AND ${m._2._1} IS NULL
          |""".stripMargin
@@ -116,7 +117,7 @@ case class IDCard_Completion_Utils(s: SparkSession,
          |    ON A.name=B.name AND A.company_name=B.company_name
          |  )
          |)
-         |WHERE num=1
+         |WHERE num=1 AND is_id_card(identity_num)--必须是规范的身份证号码
          |""".stripMargin
     )
     //    CompanyIncSummary(spark, project, tableName, "new_cid", dupliCols).calc