Explorar o código

feat: 索引util

许家凯 %!s(int64=4) %!d(string=hai) anos
pai
achega
49eac7a9d1

+ 68 - 0
src/main/scala/com/winhc/bigdata/spark/utils/CompanyIndexUtils.scala

@@ -0,0 +1,68 @@
+package com.winhc.bigdata.spark.utils
+
+import com.winhc.bigdata.spark.utils.BaseUtil.cleanup
+import org.apache.commons.lang3.StringUtils
+
+/**
+ * @author: XuJiakai
+ * @date: 2020/11/23 10:46
+ */
+
+case class human(id: String, name: String)
+
+case class CompanyName(show: String, value: String)
+
+
+object CompanyIndexUtils {
+  def getHuman(id: String, name: String): human = human(id, name)
+
+
+  def getCompanyName(name: String): CompanyName = {
+    if (StringUtils.isEmpty(name)) null
+    else {
+      val value = cleanup(name)
+      CompanyName(name.replaceAll("\t;","").trim, value)
+    }
+  }
+
+  def getHistoryName(cname: String, names: String): Seq[CompanyName] = {
+    if (StringUtils.isEmpty(names)) {
+      null
+    } else {
+      val res = getSplit(names)
+        .filter(!cname.equals(_))
+        .filter(StringUtils.isNoneEmpty(_))
+        .map(getCompanyName)
+      if (res.isEmpty) {
+        null
+      } else {
+        res
+      }
+    }
+  }
+
+  def company_score_weight(reg_status: String, cname: String, reg_capital_amount: String, company_type: String): String = {
+    if (cname == null || cleanup(cname).replaceAll("[0-9]", "").length == 1)
+      return "0.01"
+    if (cleanup(cname).replaceAll("[0-9]", "").length <= 3)
+      return "0.3"
+    if (reg_status == null || reg_status.contains("销") || reg_status.contains("消"))
+      return "1"
+    val amount = reg_capital_amount == null match {
+      case true => 0L
+      case false => reg_capital_amount.toLong
+    }
+    var w = Math.log(amount / 10000000 + 1) + 1
+    if ("1".equals(company_type))
+      w = w + 3
+    w + ""
+  }
+
+  def getSplit(str: String): Seq[String] = {
+    if (StringUtils.isNotBlank(str)) {
+      str.split("\t;\t").filter(StringUtils.isNotBlank).toSet.toList
+    } else {
+      Seq.empty[String]
+    }
+  }
+}