|
@@ -0,0 +1,161 @@
|
|
|
+package com.winhc.bigdata.spark.jobs
|
|
|
+
|
|
|
+import com.winhc.bigdata.spark.udf.CompanyMapping
|
|
|
+import com.winhc.bigdata.spark.utils.{BaseUtil, LoggingUtils, SparkUtils}
|
|
|
+import org.apache.commons.lang3.StringUtils
|
|
|
+import org.apache.spark.sql.SparkSession
|
|
|
+
|
|
|
+import scala.collection.mutable
|
|
|
+
|
|
|
+
|
|
|
+/**
|
|
|
+ * @Description: 文书增量添加cid
|
|
|
+ * @author π
|
|
|
+ * @date 2020/8/17
|
|
|
+ */
|
|
|
+object CompanyWenshuDetailCombine {
|
|
|
+ def main(args: Array[String]): Unit = {
|
|
|
+ val project ="winhc_eci_dev"
|
|
|
+ val tableName ="inc_ods_wenshu_detail_combine"
|
|
|
+ println(
|
|
|
+ s"""
|
|
|
+ |project: $project
|
|
|
+ |tableName: $tableName
|
|
|
+ |""".stripMargin)
|
|
|
+
|
|
|
+ val config = mutable.Map(
|
|
|
+ "spark.hadoop.odps.project.name" -> "winhc_eci_dev",
|
|
|
+ "spark.hadoop.odps.spark.local.partition.amt" -> "100"
|
|
|
+ )
|
|
|
+ val spark: SparkSession = SparkUtils.InitEnv(this.getClass.getSimpleName, config)
|
|
|
+ CompanyWenshuDetailCombine(spark,project,tableName).calc
|
|
|
+ spark.stop()
|
|
|
+ }
|
|
|
+
|
|
|
+}
|
|
|
+case class CompanyWenshuDetailCombine(s: SparkSession,
|
|
|
+ project: String, //表所在工程名
|
|
|
+ tableName: String //表名(不加前后辍)
|
|
|
+ ) extends LoggingUtils with CompanyMapping{
|
|
|
+ override protected val spark: SparkSession = s
|
|
|
+
|
|
|
+ def calc ={
|
|
|
+ prepareFunctions(spark)
|
|
|
+ var v1 = BaseUtil.getPartion("winhc_eci_dev.inc_ods_wenshu_detail_combine", spark)//添加cid后文书-最新分区
|
|
|
+ val v2 = BaseUtil.getPartion("winhc_eci.ods_wenshu_detail", spark)//缺cid文书-最新分区
|
|
|
+
|
|
|
+ val mapDs = BaseUtil.getPartion("winhc_eci_dev.base_company_mapping", spark)//cid映射最新分区
|
|
|
+
|
|
|
+ if(StringUtils.isBlank(v1)){
|
|
|
+ v1="20200604"
|
|
|
+ }
|
|
|
+
|
|
|
+ println(
|
|
|
+ s"""
|
|
|
+ |v1:$v1
|
|
|
+ |v2:$v2
|
|
|
+ |mapDs:$mapDs
|
|
|
+ |""".stripMargin)
|
|
|
+
|
|
|
+ sql(
|
|
|
+ s"""
|
|
|
+ |INSERT OVERWRITE TABLE winhc_eci_dev.inc_ods_wenshu_detail_combine PARTITION(ds='$v2')
|
|
|
+ |SELECT name_type
|
|
|
+ | ,e.cname
|
|
|
+ | ,f.new_cid AS cid
|
|
|
+ | ,case_id
|
|
|
+ | ,uuid
|
|
|
+ | ,docid
|
|
|
+ | ,case_no
|
|
|
+ | ,doc_type
|
|
|
+ | ,case_type
|
|
|
+ | ,case_reason_level2
|
|
|
+ | ,case_reason_level3
|
|
|
+ | ,case_reason_level4
|
|
|
+ | ,case_reason
|
|
|
+ | ,case_reason_levelnum
|
|
|
+ | ,case_stage
|
|
|
+ | ,case_amt
|
|
|
+ | ,party_info
|
|
|
+ | ,court_name
|
|
|
+ | ,court_province
|
|
|
+ | ,court_city
|
|
|
+ | ,court_level
|
|
|
+ | ,yg_info
|
|
|
+ | ,yg_type
|
|
|
+ | ,yg_name
|
|
|
+ | ,yg_wtdlr
|
|
|
+ | ,yg_faren
|
|
|
+ | ,yg_lawyer
|
|
|
+ | ,bg_info
|
|
|
+ | ,bg_type
|
|
|
+ | ,bg_name
|
|
|
+ | ,bg_wtdlr
|
|
|
+ | ,bg_faren
|
|
|
+ | ,bg_lawyer
|
|
|
+ | ,third_party
|
|
|
+ | ,danbao
|
|
|
+ | ,fact
|
|
|
+ | ,court_view
|
|
|
+ | ,judge
|
|
|
+ | ,clerk
|
|
|
+ | ,judge_date_cn
|
|
|
+ | ,judge_date
|
|
|
+ | ,judge_year
|
|
|
+ | ,judge_result
|
|
|
+ | ,is_success
|
|
|
+ | ,url
|
|
|
+ | ,head
|
|
|
+ | ,title
|
|
|
+ | ,legal_basis
|
|
|
+ | ,keywords
|
|
|
+ | ,plaintiffs
|
|
|
+ | ,defendants
|
|
|
+ | ,crawl_date
|
|
|
+ | ,update_date
|
|
|
+ | ,sample_type
|
|
|
+ | ,judge_main
|
|
|
+ |FROM (
|
|
|
+ | SELECT *
|
|
|
+ | FROM (
|
|
|
+ | SELECT *
|
|
|
+ | ,ROW_NUMBER() OVER (PARTITION BY case_no,cname,name_type ORDER BY update_date DESC) num
|
|
|
+ | FROM (
|
|
|
+ | SELECT *
|
|
|
+ | FROM (
|
|
|
+ | SELECT "y" AS name_type
|
|
|
+ | ,*
|
|
|
+ | FROM winhc_eci.ods_wenshu_detail
|
|
|
+ | LATERAL VIEW explode(split(yg_name, '\n')) tmpTable AS cname
|
|
|
+ | WHERE ds > '$v1'
|
|
|
+ | AND yg_type = '企业'
|
|
|
+ | ) c
|
|
|
+ | UNION ALL
|
|
|
+ | SELECT *
|
|
|
+ | FROM (
|
|
|
+ | SELECT "b" AS name_type
|
|
|
+ | ,*
|
|
|
+ | FROM winhc_eci.ods_wenshu_detail
|
|
|
+ | LATERAL VIEW explode(split(bg_name, '\n')) tmpTable AS cname
|
|
|
+ | WHERE ds > '$v1'
|
|
|
+ | AND bg_type = '企业'
|
|
|
+ | ) d
|
|
|
+ | ) e
|
|
|
+ | ) x
|
|
|
+ | WHERE num = 1
|
|
|
+ | ) e
|
|
|
+ |JOIN (
|
|
|
+ | SELECT *
|
|
|
+ | FROM (
|
|
|
+ | SELECT *
|
|
|
+ | ,ROW_NUMBER() OVER(PARTITION BY CLEANUP(cname) ORDER BY update_time DESC) num
|
|
|
+ | FROM winhc_eci_dev.base_company_mapping
|
|
|
+ | WHERE ds = '$mapDs'
|
|
|
+ | ) k
|
|
|
+ | WHERE num = 1
|
|
|
+ | AND length(CLEANUP(cname)) > 4
|
|
|
+ | ) f
|
|
|
+ |ON CLEANUP(e.cname) = CLEANUP(f.cname)
|
|
|
+ |""".stripMargin)
|
|
|
+ }
|
|
|
+}
|