|
@@ -27,7 +27,7 @@ object JudicialCaseRelationPreNew {
|
|
println("please check project ds c!")
|
|
println("please check project ds c!")
|
|
sys.exit(-1)
|
|
sys.exit(-1)
|
|
}
|
|
}
|
|
- if(ds.equals("all")) ds =""
|
|
|
|
|
|
+ if (ds.equals("all")) ds = ""
|
|
println(
|
|
println(
|
|
s"""
|
|
s"""
|
|
|project: $project
|
|
|project: $project
|
|
@@ -65,6 +65,7 @@ case class JudicialCaseRelationPreNew(s: SparkSession, project: String, ds: Stri
|
|
}
|
|
}
|
|
|
|
|
|
def precalc(): Unit = {
|
|
def precalc(): Unit = {
|
|
|
|
+ case_no_trim_udf()
|
|
prepareFunctions(spark)
|
|
prepareFunctions(spark)
|
|
val t1 = s"$project.inc_ads_company_court_announcement"
|
|
val t1 = s"$project.inc_ads_company_court_announcement"
|
|
var t1_ds = ds
|
|
var t1_ds = ds
|
|
@@ -155,12 +156,12 @@ case class JudicialCaseRelationPreNew(s: SparkSession, project: String, ds: Stri
|
|
| FROM $project.ads_judicial_case_relation_graph
|
|
| FROM $project.ads_judicial_case_relation_graph
|
|
| ) a
|
|
| ) a
|
|
|RIGHT JOIN (
|
|
|RIGHT JOIN (
|
|
- | SELECT *,md5(cleanup(case_no)) as new_judicase_id
|
|
|
|
|
|
+ | SELECT *,md5(cleanup(case_no_trim(case_no))) as new_judicase_id
|
|
| FROM
|
|
| FROM
|
|
| (
|
|
| (
|
|
| SELECT *,row_number() over(partition by docid order by judge_date desc) num
|
|
| SELECT *,row_number() over(partition by docid order by judge_date desc) num
|
|
- | FROM $project.ods_wenshu_detail
|
|
|
|
- | WHERE ds > '0'
|
|
|
|
|
|
+ | FROM winhc_eci.ods_wenshu_detail
|
|
|
|
+ | WHERE ds > '0' AND case_no_trim(case_no) is not null
|
|
| )c
|
|
| )c
|
|
| where num = 1
|
|
| where num = 1
|
|
| ) b
|
|
| ) b
|
|
@@ -309,9 +310,13 @@ case class JudicialCaseRelationPreNew(s: SparkSession, project: String, ds: Stri
|
|
t2_ds = BaseUtil.getPartion(t2, "wenshu", spark)
|
|
t2_ds = BaseUtil.getPartion(t2, "wenshu", spark)
|
|
t1_ds = BaseUtil.getPartion(t1, spark)
|
|
t1_ds = BaseUtil.getPartion(t1, spark)
|
|
}
|
|
}
|
|
- //司法案件id交换表
|
|
|
|
- val t3 = "ads_judicial_case_relation_replace"
|
|
|
|
|
|
+
|
|
|
|
+ val t3 = "ads_judicial_case_relation_replace" //司法案件id交换表
|
|
val t4 = "ads_judicial_case_incr_mapping"
|
|
val t4 = "ads_judicial_case_incr_mapping"
|
|
|
|
+ val t5 = s"base_company_mapping" //公司name和cid映射
|
|
|
|
+ val t6 = s"ads_judicial_case_relation_replace_cids" //公司name和cid映射
|
|
|
|
+
|
|
|
|
+ val t5_ds = BaseUtil.getPartion(t5, spark) //映射表分区
|
|
|
|
|
|
//替换司法案件id
|
|
//替换司法案件id
|
|
sql(
|
|
sql(
|
|
@@ -347,14 +352,14 @@ case class JudicialCaseRelationPreNew(s: SparkSession, project: String, ds: Stri
|
|
| ,date
|
|
| ,date
|
|
| ,detail_id
|
|
| ,detail_id
|
|
| ,case_amt
|
|
| ,case_amt
|
|
- | ,md5(CLEANUP(case_no)) as new_judicase_id
|
|
|
|
- | from $project.ads_judicial_case_relation_pre
|
|
|
|
|
|
+ | ,md5(CLEANUP(case_no_trim(case_no))) as new_judicase_id
|
|
|
|
+ | from $project.$t2
|
|
| where ds= '$t2_ds' and tn <> 'wenshu' and case_no_trim(case_no) is not null
|
|
| where ds= '$t2_ds' and tn <> 'wenshu' and case_no_trim(case_no) is not null
|
|
| and date is not null and length(date) = 19
|
|
| and date is not null and length(date) = 19
|
|
|) a
|
|
|) a
|
|
|LEFT JOIN (
|
|
|LEFT JOIN (
|
|
| select case_no_trim(case_no) as case_no,max(judicase_id) judicase_id
|
|
| select case_no_trim(case_no) as case_no,max(judicase_id) judicase_id
|
|
- | from $project.ads_judicial_case_relation_pre
|
|
|
|
|
|
+ | from $project.$t2
|
|
| where ds = '$t2_ds' and tn ='wenshu' and case_no_trim(case_no) is not null
|
|
| where ds = '$t2_ds' and tn ='wenshu' and case_no_trim(case_no) is not null
|
|
| group by case_no
|
|
| group by case_no
|
|
|) b
|
|
|) b
|
|
@@ -375,14 +380,79 @@ case class JudicialCaseRelationPreNew(s: SparkSession, project: String, ds: Stri
|
|
| ,date
|
|
| ,date
|
|
| ,detail_id
|
|
| ,detail_id
|
|
| ,case_amt
|
|
| ,case_amt
|
|
- |from $project.ads_judicial_case_relation_pre
|
|
|
|
|
|
+ |from $project.$t2
|
|
|where ds = '$t2_ds' and tn ='wenshu' and case_no_trim(case_no) is not null
|
|
|where ds = '$t2_ds' and tn ='wenshu' and case_no_trim(case_no) is not null
|
|
| and date is not null and length(date) = 19
|
|
| and date is not null and length(date) = 19
|
|
|""".stripMargin).show(10, false)
|
|
|""".stripMargin).show(10, false)
|
|
|
|
|
|
- val second_ds = getSecondLastPartitionOrElse(t3, "0")
|
|
|
|
- println(s"calc ds: $t2_ds, par ds : $t1_ds, second_ds : $second_ds")
|
|
|
|
|
|
+ //name 替换 cid
|
|
|
|
+ sql(
|
|
|
|
+ s"""
|
|
|
|
+ |INSERT ${if (isWindows) "INTO" else "OVERWRITE"} TABLE $project.$t6 partition (ds = '$t1_ds')
|
|
|
|
+ |SELECT
|
|
|
|
+ | a.judicase_id
|
|
|
|
+ | ,flag
|
|
|
|
+ | ,title
|
|
|
|
+ | ,case_type
|
|
|
|
+ | ,case_reason
|
|
|
|
+ | ,case_no
|
|
|
|
+ | ,court_name
|
|
|
|
+ | ,case_stage
|
|
|
|
+ | ,lable
|
|
|
|
+ | ,detail
|
|
|
|
+ | ,yg_name
|
|
|
|
+ | ,bg_name
|
|
|
|
+ | ,DATE
|
|
|
|
+ | ,detail_id
|
|
|
|
+ | ,case_amt
|
|
|
|
+ | ,coalesce(b.cids,'') AS cids
|
|
|
|
+ |FROM (
|
|
|
|
+ | SELECT *
|
|
|
|
+ | FROM $project.$t3
|
|
|
|
+ | WHERE ds = '$t1_ds'
|
|
|
|
+ | ) a
|
|
|
|
+ |LEFT JOIN (
|
|
|
|
+ | SELECT
|
|
|
|
+ | judicase_id
|
|
|
|
+ | ,sort(concat_ws(',',collect_set(cid)),',') cids
|
|
|
|
+ | FROM (
|
|
|
|
+ | SELECT
|
|
|
|
+ | e.judicase_id
|
|
|
|
+ | ,f.new_cid cid
|
|
|
|
+ | FROM (
|
|
|
|
+ | SELECT *
|
|
|
|
+ | FROM (
|
|
|
|
+ | SELECT
|
|
|
|
+ | yg_name AS names
|
|
|
|
+ | ,judicase_id
|
|
|
|
+ | FROM $project.$t3
|
|
|
|
+ | WHERE ds = '$t1_ds' AND length(cleanup(yg_name)) >4
|
|
|
|
+ | UNION ALL
|
|
|
|
+ | SELECT
|
|
|
|
+ | bg_name AS names
|
|
|
|
+ | ,judicase_id
|
|
|
|
+ | FROM $project.$t3
|
|
|
|
+ | WHERE ds = '$t1_ds' AND length(cleanup(bg_name)) >4
|
|
|
|
+ | ) a
|
|
|
|
+ | LATERAL VIEW explode(split(names,',')) t AS name
|
|
|
|
+ | ) e
|
|
|
|
+ | JOIN (
|
|
|
|
+ | SELECT
|
|
|
|
+ | cname
|
|
|
|
+ | ,max(new_cid) AS new_cid
|
|
|
|
+ | FROM $project.$t5
|
|
|
|
+ | WHERE ds = '$t5_ds' AND length(cleanup(cname)) >4
|
|
|
|
+ | GROUP BY cname
|
|
|
|
+ | ) f
|
|
|
|
+ | ON cleanup(e.name) = cleanup(f.cname)
|
|
|
|
+ | )
|
|
|
|
+ | GROUP BY judicase_id
|
|
|
|
+ | ) b
|
|
|
|
+ |ON a.judicase_id = b.judicase_id
|
|
|
|
+ |""".stripMargin)
|
|
|
|
|
|
|
|
+ val second_ds = getSecondLastPartitionOrElse(t6, "0")
|
|
|
|
+ println(s"calc ds: $t2_ds, par ds : $t1_ds, second_ds : $second_ds")
|
|
|
|
|
|
//找出增量数据
|
|
//找出增量数据
|
|
sql(
|
|
sql(
|
|
@@ -392,15 +462,15 @@ case class JudicialCaseRelationPreNew(s: SparkSession, project: String, ds: Stri
|
|
| ,CASE WHEN a.judicase_id IS NULL THEN 1 ELSE 0 END
|
|
| ,CASE WHEN a.judicase_id IS NULL THEN 1 ELSE 0 END
|
|
|FROM (
|
|
|FROM (
|
|
| SELECT judicase_id
|
|
| SELECT judicase_id
|
|
- | ,md5(concat_ws('',judicase_id, sort(concat_ws('\001',collect_set(case_no))))) r1
|
|
|
|
- | FROM $project.$t3
|
|
|
|
|
|
+ | ,md5(concat_ws('',judicase_id, sort(concat_ws(',',collect_set(case_no)),','), sort(concat_ws(',',collect_set(cids)),','))) r1
|
|
|
|
+ | FROM $project.$t6
|
|
| WHERE ds = '$t1_ds'
|
|
| WHERE ds = '$t1_ds'
|
|
| GROUP BY judicase_id
|
|
| GROUP BY judicase_id
|
|
| ) a
|
|
| ) a
|
|
|FULL JOIN (
|
|
|FULL JOIN (
|
|
| SELECT judicase_id
|
|
| SELECT judicase_id
|
|
- | ,md5(concat_ws('',judicase_id, sort(concat_ws('\001',collect_set(case_no))))) r2
|
|
|
|
- | FROM $project.$t3
|
|
|
|
|
|
+ | ,md5(concat_ws('',judicase_id, sort(concat_ws(',',collect_set(case_no)),','), sort(concat_ws(',',collect_set(cids)),','))) r2
|
|
|
|
+ | FROM $project.$t6
|
|
| WHERE ds = '$second_ds'
|
|
| WHERE ds = '$second_ds'
|
|
| GROUP BY judicase_id
|
|
| GROUP BY judicase_id
|
|
| ) b
|
|
| ) b
|
|
@@ -427,6 +497,7 @@ case class JudicialCaseRelationPreNew(s: SparkSession, project: String, ds: Stri
|
|
| ,max(date) AS date
|
|
| ,max(date) AS date
|
|
| ,trim_black(concat_ws(',',collect_set(court_level))) court_level
|
|
| ,trim_black(concat_ws(',',collect_set(court_level))) court_level
|
|
| ,max(deleted) deleted
|
|
| ,max(deleted) deleted
|
|
|
|
+ | ,concat_ws(',',collect_set(cids)) cids
|
|
|FROM (
|
|
|FROM (
|
|
| SELECT a.* ,first_value(yg_name) OVER (PARTITION BY a.judicase_id ORDER BY date ASC ) AS first_yg_name
|
|
| SELECT a.* ,first_value(yg_name) OVER (PARTITION BY a.judicase_id ORDER BY date ASC ) AS first_yg_name
|
|
| ,first_value(bg_name) OVER (PARTITION BY a.judicase_id ORDER BY date ASC ) AS first_bg_name
|
|
| ,first_value(bg_name) OVER (PARTITION BY a.judicase_id ORDER BY date ASC ) AS first_bg_name
|
|
@@ -434,7 +505,7 @@ case class JudicialCaseRelationPreNew(s: SparkSession, project: String, ds: Stri
|
|
| ,b.deleted
|
|
| ,b.deleted
|
|
| FROM (
|
|
| FROM (
|
|
| SELECT *,court_level(court_name) court_level
|
|
| SELECT *,court_level(court_name) court_level
|
|
- | FROM $project.$t3
|
|
|
|
|
|
+ | FROM $project.$t6
|
|
| WHERE ds >= '$second_ds'
|
|
| WHERE ds >= '$second_ds'
|
|
| ) a JOIN
|
|
| ) a JOIN
|
|
| (
|
|
| (
|
|
@@ -471,7 +542,7 @@ case class JudicialCaseRelationPreNew(s: SparkSession, project: String, ds: Stri
|
|
| ,b.deleted
|
|
| ,b.deleted
|
|
| FROM (
|
|
| FROM (
|
|
| SELECT *
|
|
| SELECT *
|
|
- | FROM $project.$t3
|
|
|
|
|
|
+ | FROM $project.$t6
|
|
| WHERE ds >= '$second_ds'
|
|
| WHERE ds >= '$second_ds'
|
|
| )a JOIN
|
|
| )a JOIN
|
|
| (
|
|
| (
|