|
@@ -0,0 +1,67 @@
|
|
|
+package com.winhc.bigdata.spark.jobs
|
|
|
+
|
|
|
+import com.winhc.bigdata.spark.utils.{EsUtils, SparkUtils}
|
|
|
+import org.datanucleus.util.StringUtils
|
|
|
+
|
|
|
+/**
|
|
|
+ * @Author: XuJiakai
|
|
|
+ * @Date: 2020/6/5 14:28
|
|
|
+ * @Description:
|
|
|
+ */
|
|
|
+object CompanyIndexSave2Es {
|
|
|
+ val pattern = "[^\\u4e00-\\u9fa50-9a-zA-Z]".r
|
|
|
+
|
|
|
+ case class CompanyName(show: String, value: String) extends Serializable
|
|
|
+
|
|
|
+ case class CompanyDoc(cname: CompanyName, current_id: String = null, history_name: Seq[CompanyName] = null) extends Serializable
|
|
|
+
|
|
|
+ def getEsDoc(cid: String, cname: String, other_id_name: scala.collection.Map[String, String], new_cid: String): (String, CompanyDoc) = {
|
|
|
+ var history_name: Seq[CompanyName] = null
|
|
|
+ if (other_id_name != null)
|
|
|
+ if (new_cid != null) {
|
|
|
+ history_name = other_id_name
|
|
|
+ .filterKeys(!new_cid.equals(_))
|
|
|
+ .values
|
|
|
+ .map(getCompanyName)
|
|
|
+ .toSeq
|
|
|
+ } else {
|
|
|
+ history_name = other_id_name
|
|
|
+ .values
|
|
|
+ .map(getCompanyName)
|
|
|
+ .toSeq
|
|
|
+ }
|
|
|
+ if (cid.equals(new_cid)) {
|
|
|
+ (cid, CompanyDoc(getCompanyName(cname), null, history_name))
|
|
|
+ } else {
|
|
|
+ (cid, CompanyDoc(getCompanyName(cname), new_cid, history_name))
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ private def getCompanyName(name: String): CompanyName = {
|
|
|
+ if (StringUtils.isEmpty(name)) {
|
|
|
+ null
|
|
|
+ } else {
|
|
|
+ val value = pattern replaceAllIn(name, "")
|
|
|
+ Map("show" -> name, "value" -> value)
|
|
|
+ CompanyName(name, value)
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ def main(args: Array[String]): Unit = {
|
|
|
+ val map = EsUtils.getEsConfigMap
|
|
|
+
|
|
|
+ val spark = SparkUtils.InitEnv("CompanyIndexSave2Es", map)
|
|
|
+ import org.elasticsearch.spark._
|
|
|
+ import spark.implicits._
|
|
|
+ val df = spark.sql("select cid,cname,other_id_name,new_cid from company_name_mapping_pro")
|
|
|
+ df.map(r => {
|
|
|
+ val cid = r.getString(0)
|
|
|
+ val cname = r.getString(1)
|
|
|
+ val other_id_name = r.getMap[String, String](2)
|
|
|
+ val new_cid = r.getString(3)
|
|
|
+ getEsDoc(cid, cname, other_id_name, new_cid)
|
|
|
+ }).rdd.saveToEsWithMeta("winhc-company/company")
|
|
|
+ spark.stop()
|
|
|
+ }
|
|
|
+}
|