|
@@ -25,6 +25,7 @@ case class IDCard_Completion_Utils(s: SparkSession,
|
|
|
println(s"${this.getClass.getSimpleName} calc start! " + new Date().toString)
|
|
|
|
|
|
prepareFunctions(spark)
|
|
|
+ is_id_card_udf()
|
|
|
|
|
|
//参与补全的表
|
|
|
var mapTables = new mutable.HashMap[String, (String, String, String, String, String, String, String)]()
|
|
@@ -62,7 +63,6 @@ case class IDCard_Completion_Utils(s: SparkSession,
|
|
|
println("not all tables have the same partition of newest !!!")
|
|
|
sys.exit(-1)
|
|
|
}
|
|
|
- is_id_card_udf()
|
|
|
id_card_trim_udf()
|
|
|
lastDsIncOds = minDs
|
|
|
spark.sparkContext.setJobDescription(s"补全身份证号码:${mapTables.size}个表聚合($lastDsIncOds)")
|
|
@@ -70,11 +70,11 @@ case class IDCard_Completion_Utils(s: SparkSession,
|
|
|
s"""
|
|
|
|SELECT ${m._2._2} AS name, ${m._2._3} AS identity_num, ${m._2._4} AS company_name, ${m._2._5} AS case_no, ${m._2._6} AS court_name, ${m._2._7} AS source, ${m._2._7} AS flag
|
|
|
|FROM $project.ods_${m._1}
|
|
|
- |WHERE ds>'0' AND ${m._2._1} IS NULL
|
|
|
+ |WHERE ds>'0' AND is_id_card${m._2._3}//严格限制必须有符合要求的身份证号码
|
|
|
|UNION ALL
|
|
|
|SELECT ${m._2._2} AS name, ${m._2._3} AS identity_num, ${m._2._4} AS company_name, ${m._2._5} AS case_no, ${m._2._6} AS court_name, ${m._2._7} AS source, ${m._2._7} AS flag
|
|
|
|FROM $project.inc_ods_${m._1}
|
|
|
- |WHERE ds>'0' AND ${m._2._1} IS NULL
|
|
|
+ |WHERE ds>'0' AND is_id_card${m._2._3}//严格限制必须有符合要求的身份证号码
|
|
|
|""".stripMargin
|
|
|
}).toArray.mkString(" UNION ALL ")
|
|
|
).where("name IS NOT NULL AND case_no IS NOT NULL AND LENGTH(name)>0 AND LENGTH(case_no)>0")
|