|
@@ -303,8 +303,7 @@ case class JudicialCaseRelationPreNew(s: SparkSession, project: String, ds: Stri
|
|
spark.udf.register("name_aggs", new NameAggs(1000))
|
|
spark.udf.register("name_aggs", new NameAggs(1000))
|
|
spark.udf.register("case_reason", new CaseReasonAggs(1000))
|
|
spark.udf.register("case_reason", new CaseReasonAggs(1000))
|
|
//预处理数据
|
|
//预处理数据
|
|
- val cols = Seq("flag", "date", "detail_id")
|
|
|
|
-
|
|
|
|
|
|
+ val cols = Seq("flag", "date", "detail_id","name")
|
|
val t1 = s"$project.inc_ads_company_court_announcement"
|
|
val t1 = s"$project.inc_ads_company_court_announcement"
|
|
val t2 = s"ads_judicial_case_relation_pre"
|
|
val t2 = s"ads_judicial_case_relation_pre"
|
|
var t2_ds = ds
|
|
var t2_ds = ds
|
|
@@ -325,52 +324,42 @@ case class JudicialCaseRelationPreNew(s: SparkSession, project: String, ds: Stri
|
|
sql(
|
|
sql(
|
|
s"""
|
|
s"""
|
|
|INSERT ${if (isWindows) "INTO" else "OVERWRITE"} TABLE $project.$t3 partition (ds = '$t1_ds')
|
|
|INSERT ${if (isWindows) "INTO" else "OVERWRITE"} TABLE $project.$t3 partition (ds = '$t1_ds')
|
|
- |SELECT COALESCE(b.judicase_id,a.new_judicase_id) judicase_id
|
|
|
|
- | ,a.flag
|
|
|
|
- | ,a.title
|
|
|
|
- | ,a.case_type
|
|
|
|
- | ,a.case_reason
|
|
|
|
- | ,case_no_trim(a.case_no) as case_no
|
|
|
|
- | ,a.court_name
|
|
|
|
- | ,a.case_stage
|
|
|
|
- | ,case_label(a.flag) lable
|
|
|
|
- | ,map_2_json(${getStrToMap(cols)}) as detail
|
|
|
|
- | ,a.yg_name
|
|
|
|
- | ,a.bg_name
|
|
|
|
- | ,a.date
|
|
|
|
- | ,a.detail_id
|
|
|
|
- | ,a.case_amt
|
|
|
|
- |FROM (
|
|
|
|
- | select
|
|
|
|
- | judicase_id
|
|
|
|
- | ,flag
|
|
|
|
- | ,title
|
|
|
|
- | ,case_type
|
|
|
|
- | ,case_reason
|
|
|
|
- | ,case_no_trim(case_no) as case_no
|
|
|
|
- | ,court_name
|
|
|
|
- | ,case_stage
|
|
|
|
- | ,replace_char(yg_name) as yg_name
|
|
|
|
- | ,replace_char(bg_name) as bg_name
|
|
|
|
- | ,date
|
|
|
|
- | ,detail_id
|
|
|
|
- | ,case_amt
|
|
|
|
- | ,md5(CLEANUP(case_no_trim(case_no))) as new_judicase_id
|
|
|
|
- | from $project.$t2
|
|
|
|
- | where ds= '$t2_ds' and tn not in ('wenshu','zxr','zxr_person','company_dishonest_info','company_dishonest_info_person')
|
|
|
|
- | and case_no_trim(case_no) is not null
|
|
|
|
- | and date is not null and length(date) = 19
|
|
|
|
- |) a
|
|
|
|
- |LEFT JOIN (
|
|
|
|
- | select case_no_trim(case_no) as case_no,max(judicase_id) judicase_id
|
|
|
|
- | from $project.$t2
|
|
|
|
- | where ds = '$t2_ds' and tn in ('wenshu','zxr','zxr_person','company_dishonest_info','company_dishonest_info_person')
|
|
|
|
- | and case_no_trim(case_no) is not null
|
|
|
|
- | group by case_no
|
|
|
|
- |) b
|
|
|
|
- |ON CLEANUP(a.case_no) = CLEANUP(b.case_no)
|
|
|
|
- |union all
|
|
|
|
- |SELECT judicase_id
|
|
|
|
|
|
+ |SELECT
|
|
|
|
+ | judicase_id
|
|
|
|
+ | ,flag
|
|
|
|
+ | ,title
|
|
|
|
+ | ,case_type
|
|
|
|
+ | ,case_reason
|
|
|
|
+ | ,case_no
|
|
|
|
+ | ,court_name
|
|
|
|
+ | ,case_stage
|
|
|
|
+ | ,lable
|
|
|
|
+ | ,map_2_json(${getStrToMap(cols)}) as detail
|
|
|
|
+ | ,yg_name
|
|
|
|
+ | ,bg_name
|
|
|
|
+ | ,date
|
|
|
|
+ | ,detail_id
|
|
|
|
+ | ,case_amt
|
|
|
|
+ |FROM
|
|
|
|
+ |(
|
|
|
|
+ | SELECT COALESCE(b.judicase_id,a.new_judicase_id) judicase_id
|
|
|
|
+ | ,a.flag
|
|
|
|
+ | ,a.title
|
|
|
|
+ | ,a.case_type
|
|
|
|
+ | ,a.case_reason
|
|
|
|
+ | ,case_no_trim(a.case_no) as case_no
|
|
|
|
+ | ,a.court_name
|
|
|
|
+ | ,a.case_stage
|
|
|
|
+ | ,case_label(a.flag) lable
|
|
|
|
+ | ,a.yg_name
|
|
|
|
+ | ,a.bg_name
|
|
|
|
+ | ,a.date
|
|
|
|
+ | ,a.detail_id
|
|
|
|
+ | ,a.case_amt
|
|
|
|
+ | ,a.bg_name as name
|
|
|
|
+ | FROM (
|
|
|
|
+ | SELECT
|
|
|
|
+ | judicase_id
|
|
| ,flag
|
|
| ,flag
|
|
| ,title
|
|
| ,title
|
|
| ,case_type
|
|
| ,case_type
|
|
@@ -378,17 +367,46 @@ case class JudicialCaseRelationPreNew(s: SparkSession, project: String, ds: Stri
|
|
| ,case_no_trim(case_no) as case_no
|
|
| ,case_no_trim(case_no) as case_no
|
|
| ,court_name
|
|
| ,court_name
|
|
| ,case_stage
|
|
| ,case_stage
|
|
- | ,case_label(flag) lable
|
|
|
|
- | ,map_2_json(${getStrToMap(cols)}) as detail
|
|
|
|
| ,replace_char(yg_name) as yg_name
|
|
| ,replace_char(yg_name) as yg_name
|
|
| ,replace_char(bg_name) as bg_name
|
|
| ,replace_char(bg_name) as bg_name
|
|
| ,date
|
|
| ,date
|
|
| ,detail_id
|
|
| ,detail_id
|
|
| ,case_amt
|
|
| ,case_amt
|
|
- |from $project.$t2
|
|
|
|
- |where ds = '$t2_ds' and tn in ('wenshu','zxr','zxr_person','company_dishonest_info','company_dishonest_info_person')
|
|
|
|
- | and case_no_trim(case_no) is not null
|
|
|
|
- | and date is not null and length(date) = 19
|
|
|
|
|
|
+ | ,md5(CLEANUP(case_no_trim(case_no))) as new_judicase_id
|
|
|
|
+ | FROM $project.$t2
|
|
|
|
+ | WHERE ds= '$t2_ds' and tn not in ('wenshu','zxr','zxr_person','company_dishonest_info','company_dishonest_info_person')
|
|
|
|
+ | and case_no_trim(case_no) is not null
|
|
|
|
+ | and date is not null and length(date) = 19
|
|
|
|
+ | ) a
|
|
|
|
+ | LEFT JOIN (
|
|
|
|
+ | SELECT case_no_trim(case_no) as case_no,max(judicase_id) judicase_id
|
|
|
|
+ | FROM $project.$t2
|
|
|
|
+ | WHERE ds = '$t2_ds' and tn in ('wenshu','zxr','zxr_person','company_dishonest_info','company_dishonest_info_person')
|
|
|
|
+ | and case_no_trim(case_no) is not null
|
|
|
|
+ | GROUP BY case_no
|
|
|
|
+ | ) b
|
|
|
|
+ | ON CLEANUP(a.case_no) = CLEANUP(b.case_no)
|
|
|
|
+ | UNION ALL
|
|
|
|
+ | SELECT judicase_id
|
|
|
|
+ | ,flag
|
|
|
|
+ | ,title
|
|
|
|
+ | ,case_type
|
|
|
|
+ | ,case_reason
|
|
|
|
+ | ,case_no_trim(case_no) as case_no
|
|
|
|
+ | ,court_name
|
|
|
|
+ | ,case_stage
|
|
|
|
+ | ,case_label(flag) lable
|
|
|
|
+ | ,replace_char(yg_name) as yg_name
|
|
|
|
+ | ,replace_char(bg_name) as bg_name
|
|
|
|
+ | ,date
|
|
|
|
+ | ,detail_id
|
|
|
|
+ | ,case_amt
|
|
|
|
+ | ,replace_char(bg_name) as name
|
|
|
|
+ | FROM $project.$t2
|
|
|
|
+ | WHERE ds = '$t2_ds' and tn in ('wenshu','zxr','zxr_person','company_dishonest_info','company_dishonest_info_person')
|
|
|
|
+ | and case_no_trim(case_no) is not null
|
|
|
|
+ | and date is not null and length(date) = 19
|
|
|
|
+ |)
|
|
|""".stripMargin).show(10, false)
|
|
|""".stripMargin).show(10, false)
|
|
|
|
|
|
//name 替换 cid
|
|
//name 替换 cid
|
|
@@ -509,13 +527,14 @@ case class JudicialCaseRelationPreNew(s: SparkSession, project: String, ds: Stri
|
|
|(
|
|
|(
|
|
|SELECT judicase_id
|
|
|SELECT judicase_id
|
|
| ,max(first_title) title
|
|
| ,max(first_title) title
|
|
- | ,max(case_type) case_type
|
|
|
|
|
|
+ | ,concat_ws(',',collect_set(case_type)) case_type
|
|
| ,case_reason(case_reason,date,flag) case_reason
|
|
| ,case_reason(case_reason,date,flag) case_reason
|
|
| ,concat_ws(',',collect_set(case_no)) case_no
|
|
| ,concat_ws(',',collect_set(case_no)) case_no
|
|
| ,concat_ws(',',collect_set(court_name)) court_name
|
|
| ,concat_ws(',',collect_set(court_name)) court_name
|
|
| ,last_stage(concat_ws(' ',collect_set(case_stage))) case_stage
|
|
| ,last_stage(concat_ws(' ',collect_set(case_stage))) case_stage
|
|
- | ,concat_ws(',',max(case_type),collect_set(lable)) lable
|
|
|
|
- | ,concat('[',concat_ws(',',collect_set(detail)),']') detail
|
|
|
|
|
|
+ | ,trim_black(concat_ws(',',max(case_type),collect_set(lable))) lable
|
|
|
|
+ | -- ,concat('[',concat_ws(',',collect_set(detail)),']') detail
|
|
|
|
+ | ,null as detail
|
|
| ,max(case_amt) AS case_amt
|
|
| ,max(case_amt) AS case_amt
|
|
| ,max(date) AS date
|
|
| ,max(date) AS date
|
|
| ,trim_black(concat_ws(',',collect_set(court_level))) court_level
|
|
| ,trim_black(concat_ws(',',collect_set(court_level))) court_level
|
|
@@ -564,12 +583,12 @@ case class JudicialCaseRelationPreNew(s: SparkSession, project: String, ds: Stri
|
|
|SELECT md5(concat_ws('',judicase_id,CLEANUP(case_no))) id
|
|
|SELECT md5(concat_ws('',judicase_id,CLEANUP(case_no))) id
|
|
| ,judicase_id
|
|
| ,judicase_id
|
|
| ,max(first_title) title
|
|
| ,max(first_title) title
|
|
- | ,max(case_type) case_type
|
|
|
|
|
|
+ | ,case_type(max(case_no)) as case_type
|
|
| ,case_reason(case_reason,date,flag) case_reason
|
|
| ,case_reason(case_reason,date,flag) case_reason
|
|
| ,case_no
|
|
| ,case_no
|
|
| ,max(court_name) court_name
|
|
| ,max(court_name) court_name
|
|
| ,case_stage(max(case_no)) as case_stage
|
|
| ,case_stage(max(case_no)) as case_stage
|
|
- | ,concat_ws(',',max(case_type),collect_set(lable)) lable
|
|
|
|
|
|
+ | ,trim_black(concat_ws(',',max(case_type),collect_set(lable))) lable
|
|
| ,concat('[',concat_ws(',',collect_set(detail)),']') detail
|
|
| ,concat('[',concat_ws(',',collect_set(detail)),']') detail
|
|
| ,max(last_date) last_date
|
|
| ,max(last_date) last_date
|
|
| ,max(deleted) deleted
|
|
| ,max(deleted) deleted
|
|
@@ -582,7 +601,7 @@ case class JudicialCaseRelationPreNew(s: SparkSession, project: String, ds: Stri
|
|
| FROM (
|
|
| FROM (
|
|
| SELECT *
|
|
| SELECT *
|
|
| FROM $project.$t6
|
|
| FROM $project.$t6
|
|
- | WHERE ds >= '$second_ds'
|
|
|
|
|
|
+ | WHERE ds >= '$second_ds' AND length(lable) > 0
|
|
| )a JOIN
|
|
| )a JOIN
|
|
| (
|
|
| (
|
|
| select *
|
|
| select *
|