|
@@ -4,7 +4,7 @@ import com.alibaba.fastjson.{JSON, JSONArray, JSONPath}
|
|
|
import com.winhc.bigdata.spark.config.EsConfig
|
|
|
import com.winhc.bigdata.spark.udf.{BaseFunc, CompanyMapping}
|
|
|
import com.winhc.bigdata.spark.utils.BaseUtil.{isWindows, is_json_str}
|
|
|
-import com.winhc.bigdata.spark.utils.{BaseUtil, CompanyRelationUtils, LoggingUtils, SparkUtils}
|
|
|
+import com.winhc.bigdata.spark.utils.{AsyncExtract, BaseUtil, CompanyRelationUtils, LoggingUtils, SparkUtils}
|
|
|
import org.apache.commons.lang3.StringUtils
|
|
|
import org.apache.spark.sql.SparkSession
|
|
|
import org.apache.spark.sql.functions.{col, struct, to_json}
|
|
@@ -65,10 +65,10 @@ object args_job {
|
|
|
, explode = "LATERAL VIEW explode( split(concat_ws('\u0001', cg_assignee_id, cg_executed_person_id, gn_executed_person_id, lf_executed_person_id, fz_executed_person_id, executed_person_id),'\u0001') ) key AS key_no"
|
|
|
, keyno = "key_no")
|
|
|
|
|
|
-// , args_job(tableName = "company_lawsuit"
|
|
|
-// , explode = "LATERAL VIEW explode(jsonall_2_array('$.litigant_id', concat_ws('\u0001', defendant_info, plaintiff_info, litigant_info)) ) key AS key_no"
|
|
|
-// , keyno = "key_no"
|
|
|
-// )
|
|
|
+ // , args_job(tableName = "company_lawsuit"
|
|
|
+ // , explode = "LATERAL VIEW explode(jsonall_2_array('$.litigant_id', concat_ws('\u0001', defendant_info, plaintiff_info, litigant_info)) ) key AS key_no"
|
|
|
+ // , keyno = "key_no"
|
|
|
+ // )
|
|
|
|
|
|
, args_job(tableName = "company"
|
|
|
, rowkey = "company_id"
|
|
@@ -198,7 +198,7 @@ case class lookup_tab_pid(s: SparkSession
|
|
|
|FROM $tab_back_deleted
|
|
|
|WHERE ds > $ds
|
|
|
|GROUP BY person_id
|
|
|
- |""".stripMargin).createOrReplaceTempView("mapping")
|
|
|
+ |""".stripMargin).createOrReplaceTempView(s"mapping$tn")
|
|
|
|
|
|
sql(
|
|
|
s"""
|
|
@@ -217,18 +217,17 @@ case class lookup_tab_pid(s: SparkSession
|
|
|
| ) AS t1
|
|
|
| ) AS t2
|
|
|
|WHERE t2.num = 1
|
|
|
- |""".stripMargin).createOrReplaceTempView("tab_tmp")
|
|
|
+ |""".stripMargin).createOrReplaceTempView(s"tab_tmp$tn")
|
|
|
|
|
|
sql(
|
|
|
s"""
|
|
|
- |INSERT OVERWRITE TABLE $tar_tab PARTITION(ds='$lastDs',tn='$tn')
|
|
|
+ |INSERT ${if (isWindows) "INTO" else "OVERWRITE"} TABLE $tar_tab PARTITION(ds='$lastDs',tn='$tn')
|
|
|
|SELECT ${tar_cols.mkString(",")},
|
|
|
- | -- get_table_message(${tar_cols.mkString(",")}, '$tn') message
|
|
|
| ${to_json(tar_cols ++ Seq("tn"))}
|
|
|
- |FROM mapping a
|
|
|
+ |FROM mapping$tn a
|
|
|
|JOIN (
|
|
|
| SELECT *,'$tn' as tn
|
|
|
- | FROM tab_tmp
|
|
|
+ | FROM tab_tmp$tn
|
|
|
| ${args_job.explode}
|
|
|
| ) b
|
|
|
|ON a.person_id = b.$keyno
|
|
@@ -241,11 +240,11 @@ case class lookup_tab_pid(s: SparkSession
|
|
|
|
|
|
}
|
|
|
|
|
|
- private def to_json(seq:Seq[String]): String = {
|
|
|
+ private def to_json(seq: Seq[String]): String = {
|
|
|
val r1 = seq.map(x => {
|
|
|
s"'$x',$x"
|
|
|
}).mkString(",")
|
|
|
- s"to_json(map($r1)) message"
|
|
|
+ s"to_json(map($r1)) message"
|
|
|
}
|
|
|
|
|
|
private def get_partition_order_by(): String = {
|
|
@@ -265,15 +264,24 @@ object lookup_tab_pid {
|
|
|
println("please set project tn.")
|
|
|
sys.exit(-1)
|
|
|
}
|
|
|
- val Array(project, tn) = args
|
|
|
+ val Array(project, tableName) = args
|
|
|
val config = EsConfig.getEsConfigMap ++ mutable.Map(
|
|
|
"spark.hadoop.odps.project.name" -> project,
|
|
|
"spark.debug.maxToStringFields" -> "200",
|
|
|
"spark.hadoop.odps.spark.local.partition.amt" -> "1000"
|
|
|
)
|
|
|
val spark = SparkUtils.InitEnv(this.getClass.getSimpleName, config)
|
|
|
- val re = lookup_tab_pid(s = spark, project = project, args_job.get_args_company_job(tn))
|
|
|
- re.calc()
|
|
|
+ var start = args_job.tab_args
|
|
|
+ if (!tableName.equals("all")) {
|
|
|
+ val set = tableName.split(",").toSet
|
|
|
+ start = start.filter(a => set.contains(a.tableName))
|
|
|
+ }
|
|
|
+ val a = start.map(e => (e.tableName, () => {
|
|
|
+ val re = lookup_tab_pid(s = spark, project = project, args_job.get_args_company_job(e.tableName))
|
|
|
+ re.calc()
|
|
|
+ true
|
|
|
+ }))
|
|
|
+ AsyncExtract.startAndWait(spark, a)
|
|
|
spark.stop()
|
|
|
}
|
|
|
}
|