Bladeren bron

司法案件字段修改

xufei 4 jaren geleden
bovenliggende
commit
4ea7999f8d

+ 14 - 4
src/main/scala/com/winhc/bigdata/spark/jobs/JudicialCaseRelationPre2.scala

@@ -22,10 +22,13 @@ object JudicialCaseRelationPre2 {
 
     val config = mutable.Map(
       "spark.hadoop.odps.project.name" -> s"$project",
-      "spark.hadoop.odps.spark.local.partition.amt" -> "1000"
+      "spark.hadoop.odps.spark.local.partition.amt" -> "10000"
     )
     val spark: SparkSession = SparkUtils.InitEnv(this.getClass.getSimpleName, config)
-    JudicialCaseRelationPre2(spark, project).calc()
+
+    val r = JudicialCaseRelationPre2(spark, project)
+    //r.precalc()
+    r.calc()
     spark.stop()
   }
 }
@@ -295,6 +298,13 @@ case class JudicialCaseRelationPre2(s: SparkSession, project: String
          |WHERE   r1 IS NULL OR r2 IS NULL
          |""".stripMargin)
 
+    sql(
+      s"""
+         |SELECT  court_name,court_level(court_name) court_level
+         |FROM    $project.tmp_xf_judicial_case_relation_replace_2
+         |WHERE   ds = '$t1_ds'
+         |""".stripMargin).show(200, false)
+
     //司法案件主表
     sql(
       s"""
@@ -312,7 +322,7 @@ case class JudicialCaseRelationPre2(s: SparkSession, project: String
          |        ,max(first_bg_name) AS bg_name
          |        ,max(case_amt) AS case_amt
          |        ,max(date) AS date
-         |        ,court_level(concat_ws(',',collect_set(court_level))) court_level
+         |        ,trim_black(concat_ws(',',collect_set(court_level))) court_level
          |        ,max(deleted) deleted
          |FROM    (
          |        SELECT  a.* ,first_value(yg_name) OVER (PARTITION BY a.judicase_id ORDER BY date ASC ) AS first_yg_name
@@ -330,7 +340,7 @@ case class JudicialCaseRelationPre2(s: SparkSession, project: String
          |                ) b on a.judicase_id = b.judicase_id
          |        )
          |GROUP BY judicase_id
-         |""".stripMargin).show(10, false)
+         |""".stripMargin).show(20, false)
 
     //明细表
     sql(

+ 2 - 2
src/main/scala/com/winhc/bigdata/spark/udf/CompanyMapping.scala

@@ -51,8 +51,8 @@ trait CompanyMapping {
       sortString(s)
     })
 
-    spark.udf.register("court_level", (s: String) => {
-      courtLevel(s)
+    spark.udf.register("trim_black", (s: String) => {
+      trimBlack(s)
     })
   }
 

+ 61 - 3
src/main/scala/com/winhc/bigdata/spark/udf/CourtRank.scala

@@ -1,5 +1,6 @@
 package com.winhc.bigdata.spark.udf
 
+import com.winhc.bigdata.spark.utils.BaseUtil
 import org.apache.commons.lang3.StringUtils
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.sql.SparkSession
@@ -9,7 +10,7 @@ import scala.annotation.meta.getter
 /**
  * @Author: XuJiakai
  * @Date: 2020/9/30 13:57
- * @Description:
+ * @Description:法院等级判断
  */
 trait CourtRank {
   @(transient@getter) protected val spark: SparkSession
@@ -34,14 +35,71 @@ trait CourtRank {
     spark.sparkContext.broadcast(court_name_rank)
   }
 
+  def areaCode(): Broadcast[Map[String, String]] = {
+    val areaCode2Name = spark.sparkContext.broadcast(spark.sql(
+      s"""
+         |select province,city,area_code
+         |from winhc_eci_dev.ods_area_code where ds = '20200604'
+      """.stripMargin).collect().flatMap(r => {
+      val code = r.getString(2)
+      Seq((r.getString(0), code), (r.getString(1), code))
+    }).toMap)
+    areaCode2Name
+  }
+
+  //判断法院等级
   def registerCourtRank(): Unit = {
     val court_map = courtRank()
+    val area_code = areaCode()
     spark.udf.register("court_level", (name: String) => {
+      trans(name, court_map, area_code)
+    })
+  }
+
+  def trans(name: String, court_map: Broadcast[Map[String, String]], area_code: Broadcast[Map[String, String]]): String = {
+    var r = ""
+    try {
       if (StringUtils.isNotBlank(name)) {
-        court_map.value.getOrElse(name, "")
+        r = court_map.value.getOrElse(name, "")
+        if (StringUtils.isNotBlank(r)) {
+          return r
+        } else {
+          if (name.contains("中级")) return "中级法院"
+          if (name.contains("高级")) return "高级法院"
+          if (name.contains("最高")) return "最高法院"
+          //名称必须包含法院
+          if (name.contains("法院")) {
+            val (province, city, country) = BaseUtil.parseAddress(name)
+            //区县直接返回基层法院
+            if (StringUtils.isNotBlank(country)) {
+              return "基层法院"
+            }
+            //市判断等级
+            if (StringUtils.isNotBlank(city)) {
+              val r1 = area_code.value.getOrElse(city, "")
+              if (StringUtils.isNotBlank(r1)) {
+                return "中级法院"
+              } else {
+                return "基层法院"
+              }
+            }
+            //省直接返回高级
+            if (StringUtils.isNotBlank(province)) {
+              return "高级法院"
+            }
+          }
+        }
+        r
       } else {
         ""
       }
-    })
+    } catch {
+      case e: Exception => {
+        println(name)
+        println(e.getMessage)
+        r
+      }
+    }
+
   }
 }

+ 33 - 14
src/main/scala/com/winhc/bigdata/spark/utils/BaseUtil.scala

@@ -225,7 +225,7 @@ object BaseUtil {
     "其它"
   }
 
-  def courtLevel(s: String): String = {
+  def trimBlack(s: String): String = {
     var r = ""
     if (StringUtils.isNotBlank(s)) {
       r = s.split(",").filter(StringUtils.isNotBlank(_)).mkString(",")
@@ -237,19 +237,19 @@ object BaseUtil {
     var r = ""
     if (StringUtils.isNotBlank(s)) {
       r = s match {
-        case "0"  => "裁判文书"//企业
-        case "1"  => "开庭公告"//企业
-        case "2"  => "法院公告"//企业
-        case "3"  => "失信人"//企业
-        case "4"  => "送达公告"//企业
-        case "5"  => "限高"//企业
-        case "6"  => "终本"//企业
-        case "7"  => "被执行人"//企业
-        case "8"  => "立案信息"//企业
-        case "9"  => "失信人"//人
-        case "10" => "被执行人"//人
-        case "11" => "限高"//人
-        case "12" => "终本"//人
+        case "0" => "裁判文书" //企业
+        case "1" => "开庭公告" //企业
+        case "2" => "法院公告" //企业
+        case "3" => "失信人" //企业
+        case "4" => "送达公告" //企业
+        case "5" => "限高" //企业
+        case "6" => "终本" //企业
+        case "7" => "被执行人" //企业
+        case "8" => "立案信息" //企业
+        case "9" => "失信人" //人
+        case "10" => "被执行人" //人
+        case "11" => "限高" //人
+        case "12" => "终本" //人
         case _ => ""
       }
     }
@@ -317,7 +317,26 @@ object BaseUtil {
     null
   }
 
+  val pat = "(?<province>[^省]+省|.+自治区)?(?<city>[^自治州]+自治州|[^市]+市|[^盟]+盟|[^地区]+地区|.+区划)?(?<district>[^市]+市|[^县]+县|[^旗]+旗|.+区)?".r
+
+  /**
+   * 提取省市区
+   * @param addr
+   * @return
+   */
+  def parseAddress(addr: String): (String, String, String) = {
+      val matcher = pat.pattern.matcher(addr)
+      if (!matcher.find()) {
+        return null
+      }
+      val province = matcher.group("province")
+      val city = matcher.group("city")
+      val district = matcher.group("district")
+      (province, city, district)
+  }
+
   def main(args: Array[String]): Unit = {
+    println(parseAddress("大石桥市人民法院"))
     println(case_no_trim("(2015)怀执字第03601号号"))
     val seq = Seq("1", "3", "2", "7").mkString("\001")
     println(sortString(seq))