Browse Source

feat: 添加法院层级广播变量函数

许家凯 4 years ago
parent
commit
789de46a5b

+ 35 - 0
src/main/scala/com/winhc/bigdata/spark/udf/CourtRank.scala

@@ -0,0 +1,35 @@
+package com.winhc.bigdata.spark.udf
+
+import org.apache.spark.broadcast.Broadcast
+import org.apache.spark.sql.SparkSession
+
+import scala.annotation.meta.getter
+
+/**
+ * @Author: XuJiakai
+ * @Date: 2020/9/30 13:57
+ * @Description:
+ */
+trait CourtRank {
+  @(transient@getter) protected val spark: SparkSession
+
+  def courtRank(): Broadcast[Map[String, String]] = {
+    val court_name_rank: Map[String, String] = spark.sql(
+      s"""
+         |select * from winhc_eci_dev.court_rank
+         |""".stripMargin)
+      .collect()
+      .map(r => (r.getAs[String]("name"), r.getAs[String]("court_rank")))
+      .flatMap(t => {
+        val name = t._1
+        val rank = t._2
+        if (name.contains("又名")) {
+          val ns = name.replaceAll("[\\(\\)()]", "").split("又名")
+          ns.map(f => (f, rank))
+        } else {
+          Seq((name, rank))
+        }
+      }).toMap ++ Map("中华人民共和国最高人民法院" -> "最高法院", "最高人民法院" -> "最高法院")
+    spark.sparkContext.broadcast(court_name_rank)
+  }
+}

+ 3 - 2
src/main/scala/com/winhc/bigdata/spark/utils/BaseUtil.scala

@@ -249,14 +249,15 @@ object BaseUtil {
     case_no = case_no.replace(" ", "");
     if (case_no.length < 8) return null
     case_no = year_pat.replaceAllIn(case_no, "\\($1\\)")
-
+    case_no = case_no.replace("(", "(")
+      .replace(")", ")")
     if (case_pat matches case_no) {
       case_pat.replaceAllIn(case_no, "$1")
     } else null
   }
 
   def main(args: Array[String]): Unit = {
-    println(case_no_trim("2015年怀执字第03601号号"))
+    println(case_no_trim("(2015)怀执字第03601号号"))
   }
 
 }