Browse Source

feat: 省市区code转name增强版

许家凯 3 years ago
parent
commit
8558767deb

+ 49 - 0
src/main/scala/com/winhc/bigdata/spark/implicits/AreaCode2NameHelper.scala

@@ -0,0 +1,49 @@
+package com.winhc.bigdata.spark.implicits
+
+import com.winhc.bigdata.spark.utils.AreaCode2Name
+import org.apache.commons.lang3.StringUtils
+
+/**
+ * @author: XuJiakai
+ * @date: 2021/5/28 10:33
+ */
+object AreaCode2NameHelper {
+
+  implicit class AreaCode2NameEnhancer(map: Map[String, AreaCode2Name]) extends Serializable {
+    def get_area_name(province_code: String, city_code: String, county_code: String): (String, String, String) = {
+      if (StringUtils.isEmpty(province_code)) {
+        (null, null, null)
+      } else {
+        val f = map.getOrElse(province_code, null)
+        if (f == null) {
+          (null, null, null)
+        } else {
+          val f_name = f.name
+          val s = f.sub_content.getOrElse(city_code, null)
+          if (s == null) {
+            (f_name, null, null)
+          } else {
+            val s_name = s.name
+            val t = s.sub_content.getOrElse(county_code, null)
+            if (t == null) {
+              (f_name, s_name, null)
+            } else {
+              (f_name, s_name, t.name)
+            }
+          }
+        }
+      }
+    }
+
+    def get_area_name(area_code: String): (String, String, String) = {
+      if (StringUtils.isEmpty(area_code) || area_code.length != 6) {
+        (null, null, null)
+      } else {
+        val province_code = area_code.substring(0, 2)
+        val city_code = area_code.substring(2, 4)
+        val county_code = area_code.substring(4, 6)
+        map.get_area_name(province_code, city_code, county_code)
+      }
+    }
+  }
+}

+ 6 - 0
src/main/scala/com/winhc/bigdata/spark/implicits/BaseHelper.scala

@@ -34,5 +34,11 @@ object BaseHelper {
     def mkStringOrNull(): String = {
       if (arr.isEmpty) null else arr.mkString(",")
     }
+
+    def mkStringOrNull(sep: String): String = arr.mkStringOrNull("", sep, "")
+
+    def mkStringOrNull(start: String, sep: String, end: String): String = {
+      if (arr.isEmpty) null else arr.mkString(start, sep, end)
+    }
   }
 }

+ 56 - 3
src/main/scala/com/winhc/bigdata/spark/udf/BaseFunc.scala

@@ -2,9 +2,10 @@ package com.winhc.bigdata.spark.udf
 
 import com.alibaba.fastjson.{JSON, JSONArray, JSONPath}
 import com.winhc.bigdata.spark.implicits.CompanyIndexSave2EsHelper
+import com.winhc.bigdata.spark.implicits.AreaCode2NameHelper._
 import com.winhc.bigdata.spark.utils.BaseUtil._
-import com.winhc.bigdata.spark.utils.{BaseUtil, LoggingUtils}
-import org.apache.commons.lang3.StringUtils
+import com.winhc.bigdata.spark.utils.{AreaCode2Name, BaseUtil, LoggingUtils, area_code_org}
+import org.apache.commons.lang3.{StringEscapeUtils, StringUtils}
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.sql.SparkSession
 import org.json4s._
@@ -29,6 +30,35 @@ trait BaseFunc extends LoggingUtils {
      })
    }*/
 
+
+  def json_parse_udf(): Unit = {
+    /**
+     *
+     * @param json_array
+     * @param json_path "$.name"
+     * @return
+     */
+    def json_2_array(json_array: String, json_path: String): Seq[String] = {
+      try {
+        if (StringUtils.isEmpty(json_array)) {
+          return Seq.empty
+        }
+        if (!is_json_str(json_array)) {
+          return Seq.empty
+        }
+        JSONPath.eval(JSON.parse(json_array), json_path).asInstanceOf[JSONArray].toArray[String](Array()).toSeq.distinct.diff(Seq(""))
+      } catch {
+        case e: Exception => {
+          println(json_array)
+          Seq.empty
+        }
+      }
+    }
+
+    spark.udf.register("json_2_array", json_2_array _)
+  }
+
+
   def addEmptyPartitionOrSkip(tab: String, ds: String): Unit = {
     sql(
       s"""
@@ -81,6 +111,30 @@ trait BaseFunc extends LoggingUtils {
     spark.udf.register("json_2_array", json_2_array _)
   }
 
+
+  def areaCode2Name_pro(): Unit = {
+    val area_code_df = spark.sql(
+      s"""
+         |select area_code,province,city,district
+         |from winhc_eci_dev.ods_area_code where ds = '20200604'
+      """.stripMargin).collect()
+
+    val array = area_code_df.map(f => area_code_org(f.getString(0), f.getString(1), f.getString(2), f.getString(3)))
+    val map = AreaCode2Name.getAreaCodeTree(array)
+    val broad_map = spark.sparkContext.broadcast(map)
+
+    spark.udf.register("get_area_name_pro", (areaCode: String) => {
+      val t = broad_map.value.get_area_name(areaCode)
+      Seq(t._1, t._2, t._3)
+    })
+
+    spark.udf.register("get_area_name_pro_detail", (provinceCode: String, cityCode: String, countyCode: String) => {
+      val t = broad_map.value.get_area_name(provinceCode, cityCode, countyCode)
+      Seq(t._1, t._2, t._3)
+    })
+  }
+
+
   def code2Name(): (Broadcast[Map[String, Seq[String]]], Broadcast[Map[String, Seq[String]]]) = {
     val categoryCode2Name = spark.sparkContext.broadcast(spark.sql(
       s"""
@@ -139,7 +193,6 @@ trait BaseFunc extends LoggingUtils {
   def unescapeHtml4(): Unit = {
     //清理html字符
     spark.udf.register("unescapeHtml4", (col: String) => {
-      import org.apache.commons.lang3.StringEscapeUtils
       StringEscapeUtils.unescapeHtml4(col)
     })
   }

+ 39 - 0
src/main/scala/com/winhc/bigdata/spark/utils/AreaCode2Name.scala

@@ -0,0 +1,39 @@
+package com.winhc.bigdata.spark.utils
+
+/**
+ * @author: XuJiakai
+ * @date: 2021/5/28 10:15
+ */
+case class AreaCode2Name(code: String, name: String, rank: Int, sub_content: Map[String, AreaCode2Name]) extends Comparable[AreaCode2Name] {
+  override def hashCode(): Int = s"$code@$rank".hashCode()
+
+  override def equals(obj: Any): Boolean =
+    obj match {
+      case d: AreaCode2Name =>
+        s"$code@$rank".equals(s"${d.code}@${d.rank}")
+      case _ =>
+        false
+    }
+
+  override def compareTo(o: AreaCode2Name): Int = code.compareTo(o.code)
+}
+
+case class area_code_org(area_code: String, province: String, city: String, district: String)
+
+object AreaCode2Name {
+  def getAreaCodeTree(seq: Seq[area_code_org]): Map[String, AreaCode2Name] = {
+    val map: Map[String, AreaCode2Name] = seq.groupBy(f => f.area_code.substring(0, 2)).map(f => {
+      val first_code = f._1
+      val first_name = f._2(0).province
+      val f_sub = f._2.groupBy(s => s.area_code.substring(2, 4)).map(s => {
+        val second_code = s._1
+        val second_name = s._2(0).city
+        val s_sub = s._2.map(t => (t.area_code.substring(4, 6), AreaCode2Name(t.area_code.substring(4, 6), t.district, 3, null))).toMap
+        (second_code, AreaCode2Name(second_code, second_name, 2, s_sub))
+      }).toMap
+
+      (first_code, AreaCode2Name(first_code, first_name, 1, f_sub))
+    }).toMap
+    map
+  }
+}