xufei %!s(int64=4) %!d(string=hai) anos
pai
achega
d09e884851

+ 80 - 56
src/main/scala/com/winhc/bigdata/spark/jobs/CompanyCourtAnnouncement.scala

@@ -137,14 +137,14 @@ case class CompanyCourtAnnouncement(s: SparkSession, project: String, //表所
 
     sql(
       s"""
-         |SELECT  d.*,bg_cid
+         |SELECT  d.*,bg_cid,bg_city_name
          |FROM    announcement d
          |JOIN    (
-         |            SELECT  bg_name,bg_cid
+         |            SELECT  bg_name,bg_cid,bg_city_name
          |            FROM    $ads_eci_debtor_relation
          |            WHERE   ds = $debtorRelationDs
          |            AND     deleted = 0
-         |            group by bg_name,bg_cid
+         |            group by bg_name,bg_cid,bg_city_name
          |        ) e
          |ON      cleanup(d.plaintiff_name) = cleanup(e.bg_name)
          |""".stripMargin).map(r => {
@@ -258,11 +258,14 @@ case class CompanyCourtAnnouncement(s: SparkSession, project: String, //表所
         val plaintiff = r.getAs[String]("plaintiff") //原告
         val litigant = r.getAs[String]("litigant") //当事人
         val litigant_name = r.getAs[String]("litigant_name") //被告企业
-        val label: String = Json(DefaultFormats).write(CourtAnnouncement(r, Seq("announcement_type", "publish_date"))) //标签列表
         val business_id = r.getAs[String]("rowkey") //业务主键id
         val business_type = "8" //动态类型
         val business_type_name = "0" //动态类型name
-        val m1: Map[String, String] = queryCompany(restClient, litigant_name)
+        val m1: Map[String, String] = EsQuery.queryCompany(restClient, litigant_name)
+        //标签列表
+        val label: String = Json(DefaultFormats).write(
+          CourtAnnouncement(r, Seq("announcement_type", "publish_date")) ++ Map("city_name" -> m1("city_name"))
+        )
         //动态变更内容
         val m2: Map[String, String] = CourtAnnouncement(r, Seq("plaintiff",
           "litigant", "announcement_type", "court_name", "publish_date", "content"))
@@ -294,7 +297,10 @@ case class CompanyCourtAnnouncement(s: SparkSession, project: String, //表所
     val litigant = r.getAs[String]("litigant") //当事人
     val plaintiff_name = r.getAs[String]("plaintiff_name") //原告企业
     val plaintiff_cid = r.getAs[String]("bg_cid") //原告企业
-    val label: String = Json(DefaultFormats).write(CourtAnnouncement(r, Seq("announcement_type", "publish_date"))) //标签列表
+    val city_name = r.getAs[String]("bg_city_name") //原告企业
+    val label: String = Json(DefaultFormats).write(
+      CourtAnnouncement(r, Seq("announcement_type", "publish_date")) ++ Map("city_name" -> city_name)
+    ) //标签列表
     val business_id = r.getAs[String]("rowkey") //业务主键id
     val business_type = "7" //动态类型
     val business_type_name = "0" //动态类型name
@@ -451,12 +457,64 @@ case class CompanyCourtAnnouncement(s: SparkSession, project: String, //表所
     true
   }
 
+
+
+}
+
+object CompanyCourtAnnouncement {
+  def main(args: Array[String]): Unit = {
+    var project = ""
+    var table = ""
+    var runOld = false
+
+    if (args.length == 2) {
+      val Array(project1, table1) = args
+      project = project1
+      table = table1
+    } else if (args.length == 3) {
+      val Array(project1, table1, remain) = args
+      project = project1
+      table = table1
+      if (remain.equals("1"))
+        runOld = true
+    } else {
+      println("please set project,table...")
+      sys.exit(-1)
+    }
+
+    println(
+      s"""
+         |project: $project| table: $table| runOld: $runOld
+         |""".stripMargin)
+
+    val config = mutable.Map(
+      "spark.hadoop.odps.project.name" -> "winhc_eci_dev",
+      "spark.hadoop.odps.spark.local.partition.amt" -> "100"
+    )
+    val spark: SparkSession = SparkUtils.InitEnv(this.getClass.getSimpleName, config)
+
+    val announcement = CompanyCourtAnnouncement(spark, project, table)
+    announcement.regfun()
+    //是否跑全量数据
+    if (!runOld) {
+      val flag = announcement.preCalc()
+      //增量没更新返回
+      if(!flag) return
+    }
+    announcement.calc(runOld)
+    spark.stop()
+  }
+
+}
+
+object EsQuery{
   def queryCompany(restClient: RestClient, companyName: String) = {
     val query =
       s"""
          |{
-         |  "_source": {
-         |     "includes": [ "_id","province_code", "city_code","county_code","reg_capital","estiblish_time","phones"]
+         |   "_source": {
+         |     "includes": [ "_id","province_code", "city_code","county_code","reg_capital","estiblish_time","phones",
+         |     "province_name","city_name","county_name","category_first","category_second","category_third"]
          |   },
          |  "query": {
          |    "term": {
@@ -493,6 +551,13 @@ case class CompanyCourtAnnouncement(s: SparkSession, project: String, //表所
       val estiblish_time = source.get("estiblish_time").asInstanceOf[String]
       val phones = source.get("phones").asInstanceOf[util.List[String]].asScala.mkString(",")
 
+      val province_name = source.get("province_name").asInstanceOf[String]
+      val city_name = source.get("city_name").asInstanceOf[String]
+      val county_name = source.get("county_name").asInstanceOf[String]
+      val category_first = source.get("category_first").asInstanceOf[String]
+      val category_second = source.get("category_second").asInstanceOf[String]
+      val category_third = source.get("category_third").asInstanceOf[String]
+
       Map(
         "id" -> id,
         "province_code" -> province_code,
@@ -500,7 +565,13 @@ case class CompanyCourtAnnouncement(s: SparkSession, project: String, //表所
         "county_code" -> county_code,
         "reg_capital" -> reg_capital,
         "estiblish_time" -> estiblish_time,
-        "phones" -> phones
+        "phones" -> phones,
+        "province_name" -> province_name,
+        "city_name" -> city_name,
+        "county_name" -> county_name,
+        "category_first" -> category_first,
+        "category_second" -> category_second,
+        "category_third" -> category_third
       )
     } else {
       Map.empty[String, String]
@@ -511,53 +582,6 @@ case class CompanyCourtAnnouncement(s: SparkSession, project: String, //表所
     import scala.collection.JavaConverters._
     JSON.parseObject(json).getJSONObject("hits").getJSONArray("hits").toArray().map(m => m.asInstanceOf[util.Map[String, Any]]).map(_.asScala).toList
   }
-
-}
-
-object CompanyCourtAnnouncement {
-  def main(args: Array[String]): Unit = {
-    var project = ""
-    var table = ""
-    var runOld = false
-
-    if (args.length == 2) {
-      val Array(project1, table1) = args
-      project = project1
-      table = table1
-    } else if (args.length == 3) {
-      val Array(project1, table1, remain) = args
-      project = project1
-      table = table1
-      if (remain.equals("1"))
-        runOld = true
-    } else {
-      println("please set project,table...")
-      sys.exit(-1)
-    }
-
-    println(
-      s"""
-         |project: $project| table: $table| runOld: $runOld
-         |""".stripMargin)
-
-    val config = mutable.Map(
-      "spark.hadoop.odps.project.name" -> "winhc_eci_dev",
-      "spark.hadoop.odps.spark.local.partition.amt" -> "100"
-    )
-    val spark: SparkSession = SparkUtils.InitEnv(this.getClass.getSimpleName, config)
-
-    val announcement = CompanyCourtAnnouncement(spark, project, table)
-    announcement.regfun()
-    //是否跑全量数据
-    if (!runOld) {
-      val flag = announcement.preCalc()
-      //增量没更新返回
-      if(!flag) return
-    }
-    announcement.calc(runOld)
-    spark.stop()
-  }
-
 }
 
 

+ 192 - 81
src/main/scala/com/winhc/bigdata/spark/model/CompanyBidScore.scala

@@ -1,12 +1,21 @@
 package com.winhc.bigdata.spark.model
 
-import java.util.Date
-
-import com.winhc.bigdata.calc.{DimScore, DimScoreV2}
-import com.winhc.bigdata.spark.utils.{BaseUtil, LoggingUtils, SparkUtils}
+import java.util
+import java.util.{Collections, Date}
+
+import com.alibaba.fastjson.JSON
+import com.winhc.bigdata.calc.DimScoreV2
+import com.winhc.bigdata.spark.jobs.EsQuery
+import com.winhc.bigdata.spark.utils.BaseUtil.isWindows
+import com.winhc.bigdata.spark.utils.EsRestUtils.getRestClient
+import com.winhc.bigdata.spark.utils.{BaseUtil, EsRestUtils, LoggingUtils, Maxcomputer2Hbase, SparkUtils}
 import org.apache.commons.lang3.StringUtils
-import org.apache.spark.broadcast.Broadcast
+import org.apache.http.entity.ContentType
+import org.apache.http.nio.entity.NStringEntity
+import org.apache.http.util.EntityUtils
+import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{Row, SparkSession}
+import org.elasticsearch.client.RestClient
 
 import scala.annotation.meta.getter
 import scala.collection.mutable
@@ -19,91 +28,79 @@ import scala.collection.mutable
 object CompanyBidScore {
 
   val tabMapping: Map[String, (String, String, String, String)] =
-    Map("ads_company_bid_list" -> ("1", "publish_time", "资产权益", "招投标") //招投标
+    Map("company_bid_list" -> ("306", "publish_time", "资产权益", "招投标") //招投标
     )
 
   def main(args: Array[String]): Unit = {
 
-    val (sourceTable, flag, time, kind, project) = valid(args)
+    val (namespace, sourceTable, flag, time, kind, project) = valid(args)
 
-    var config = mutable.Map.empty[String, String]
+    var config = mutable.Map(
+      "spark.hadoop.odps.project.name" -> "winhc_eci_dev",
+      "spark.hadoop.odps.spark.local.partition.amt" -> "100"
+    )
 
     val spark: SparkSession = SparkUtils.InitEnv(this.getClass.getSimpleName, config)
 
     println(s"company ${this.getClass.getSimpleName} calc start! " + new Date().toString)
+        spark.sql(
+          """
+            |select "24416401" as new_cid,1111L as id,'2020-07-18' as publish_time
+            |""".stripMargin).createOrReplaceTempView("inc_view")
+
+    new CompanyBidScore(spark, sourceTable, "inc_view", flag, time, kind, project, "1", namespace).calc()
+//    new CompanyBidScore(spark, sourceTable, "", flag, time, kind, project, "0", namespace).calc()
 
-    new CompanyBidScore(spark, sourceTable, flag, time, kind, project).calc()
 
     println(s"company ${this.getClass.getSimpleName} calc end! " + new Date().toString)
     spark.stop()
   }
 
   def valid(args: Array[String]) = {
-    if (args.length != 1) {
-      println("请输入要计算的table!!!! ")
+    println(args.mkString(", "))
+    if (args.length != 2) {
+      println("please enter namespace, table!!!! ")
       sys.exit(-1)
     }
-    val Array(sourceTable) = args
+    val Array(namespace, sourceTable) = args
 
     val (flag, time, kind, project) = tabMapping.getOrElse(sourceTable, ("", "", "", ""))
     if (flag.isEmpty || time.isEmpty || kind.isEmpty || project.isEmpty) {
-      println("输入表不存在!!!   ")
+      println("table not found!!!   ")
       sys.exit(-1)
     }
-    (sourceTable, flag, time, kind, project)
+    (namespace, sourceTable, flag, time, kind, project)
   }
 }
 
-case class CompanyBidScore(s: SparkSession, sourceTable: String,
-                           flag: String, time: String, kind: String, project: String
-                          ) extends LoggingUtils {
+case class CompanyBidScore(s: SparkSession, sourceTable: String, tableView: String,
+                           flag: String, time: String, kind: String, project: String, tp: String = "0", namespace: String
+                          ) extends LoggingUtils with Logging {
 
   @(transient@getter) val spark: SparkSession = s
 
   import spark.implicits._
 
-  def calc(): Unit = {
-
-    val ods_company = "new_ods_company"
-    val company_category = "const_company_category_code"
-
-    //    //所属行业
-    //    val code2Name: Broadcast[Map[String, String]] = spark.sparkContext.broadcast(sql(
-    //      s"""
-    //         |select category_code,category_str_big
-    //         |from $company_category
-    //      """.stripMargin).collect().map(r => {
-    //      (r.getString(0), r.getString(1))
-    //    }).toMap)
-    //
-    //    spark.udf.register("industry_name", (code: String) => {
-    //      code2Name.value.getOrElse(code, null)
-    //    })
-    //
-    //    val industry = sql(
-    //      s"""
-    //         |select category_code,cast(cid as string) as ncid,
-    //         |       industry_name(category_code) AS industry_name
-    //         |from $ods_company where cid is not null
-    //         |""".stripMargin)
-    //
-    //    industry.show(100)
-    //
-    //
-    //    industry.createOrReplaceTempView("t1")
-
-    val industry2 = sql(
-      s"""
-         |select a.category_code,cast(a.cid as string) as ncid,
-         |       b.category_str_big AS industry_name
-         |from $ods_company  a
-         |left join const_company_category_code b on a.category_code = b.category_code
-         |where cid is not null
-         |""".stripMargin)
-    industry2.createOrReplaceTempView("t1")
+  def calc() = {
+
+    val ads_company = s"$namespace.ads_company"
+    val company_category = s"$namespace.const_company_category_code"
+    val ads_company_tb = s"$namespace.ads_$sourceTable"
+    val inc_ads_company_tb = s"$namespace.inc_ads_$sourceTable"
 
-    //    注意线上是否分区
-    //     ds = '${BaseUtil.getPartion(sourceTable, spark)}' AND
+    val adsCompanyPar = BaseUtil.getPartion(ads_company, spark)
+//    val adsPar = BaseUtil.getPartion(ads_company_tb, spark)
+
+    var ds = ""
+    var appsql2 = ""
+    var tb = ads_company_tb
+    if ("1".equals(tp)) {
+      tb = tableView
+      ds = BaseUtil.getPartion(inc_ads_company_tb, spark)
+    } else {
+      ds = BaseUtil.getPartion(ads_company_tb, spark)
+      appsql2 = s"AND  ds = ${ds}"
+    }
 
     val df = sql(
       s"""
@@ -111,45 +108,100 @@ case class CompanyBidScore(s: SparkSession, sourceTable: String,
          |FROM    (
          |        SELECT
          |                *
-         |                ,COUNT(ncid) OVER(PARTITION BY ncid ) AS cnt1
-         |                ,row_number() OVER(PARTITION BY ncid ORDER BY $time DESC ) AS num
-         |        FROM    $sourceTable
-         |        WHERE
-         |
-         |             ncid IS NOT NULL
+         |                ,COUNT(new_cid) OVER(PARTITION BY new_cid ) AS cnt1
+         |                ,ROW_NUMBER() OVER(PARTITION BY new_cid ORDER BY $time DESC ) AS num
+         |        FROM    $tb
+         |        WHERE new_cid IS NOT NULL
+         |        ${appsql2}
          |        ) a
          |WHERE   num =1
-         |""".stripMargin).createOrReplaceTempView("t2")
-    //      .join(industry, Seq("ncid"), "left")
-    //      .select("cid", "id", "cnt1", "industry_name", "ncid")
+         |""".stripMargin)
 
-    val df2 = sql(
-      """
-        |select t2.*,t1.industry_name,category_code from t2 left join t1 on t2.ncid = t1.ncid
-        |""".stripMargin)
-    df2.show(100)
+    df.createOrReplaceTempView("t2")
+
+    if (tp.equals("0")) {
+      sql(
+        s"""
+           |select a.category_code,cast(a.cid as string) as new_cid,
+           |       b.category_str_big AS industry_name
+           |from $ads_company  a
+           |left join $company_category b on a.category_code = b.category_code
+           |where a.cid is not null and a.ds=${adsCompanyPar}
+           |""".stripMargin).createOrReplaceTempView("t1")
+
+      sql(
+        """
+          |select t2.*,t1.industry_name,category_code from t2 left join t1 on t2.new_cid = t1.new_cid
+          |""".stripMargin).map(r => {
+        trans(r, flag, kind, project)
+      }).toDF("id", "cid", "kind", "kind_code", "project", "project_code", "type",
+        "score", "total", "extraScore")
+        .createOrReplaceTempView(s"tmp_view")
 
-    df2.map(r => {
-      trans(r, flag, kind, project)
-    }).toDF("id", "cid", "kind", "kind_code", "project", "project_code", "type",
-      "score", "total", "extraScore")
-      .createOrReplaceTempView(s"${sourceTable}_tmp_view")
+    } else {
+      df.mapPartitions(iter => {
+        trans2(iter, flag, kind, project)
+      }).toDF("id", "cid", "kind", "kind_code", "project", "project_code", "type",
+        "score", "total", "extraScore")
+        .createOrReplaceTempView(s"tmp_view")
+    }
 
+    sql(
+      s"""
+         |insert ${if (isWindows) "INTO" else "OVERWRITE"} table ${ads_company_tb}_score partition(ds=$ds)
+         |select id,cid,kind,kind_code,project,project_code,type,score,total,extraScore
+         |from tmp_view
+         |""".stripMargin)
 
-    sql(s"select * from ${sourceTable}_tmp_view").show(10)
-    sql(s"insert overwrite table ${sourceTable}_score  select * from ${sourceTable}_tmp_view")
+    //同步hbase
+    if ("1".equals(tp)) { //存量计算不用同步hbase
+      val dataFrame = sql(
+        s"""
+           |select
+           |CONCAT_WS('_',cid,project_code) AS rowkey,
+           |id,cid,kind,kind_code,project,project_code,type,score,total,extraScore
+           |from tmp_view
+           |""".stripMargin)
+      Maxcomputer2Hbase(dataFrame, "COMPANY_SCORE").syn()
+    }
   }
 
+  //存量逻辑
   def trans(r: Row, flag: String, kind: String, prpject: String) = {
     val id = r.getAs[Long]("id")
-    val cid = r.getAs[Long]("ncid").toString
+    val cid = r.getAs[String]("new_cid")
     val cnt1 = r.getAs[Long]("cnt1")
     val industry_name = r.getAs[String]("industry_name")
     flag match {
-      case "1" => tenderScore(id, cid, cnt1, kind, prpject, industry_name)
+      case "306" => tenderScore(id, cid, cnt1, kind, prpject, industry_name)
     }
   }
 
+  //增量逻辑
+  def trans2(iter: Iterator[Row], flag: String, kind: String, prpject: String) = {
+    val restClient = getRestClient()
+    val df = iter.map(r => {
+      try {
+        val id = r.getAs[Long]("id")
+        val cid = r.getAs[String]("new_cid")
+        val cnt1 = r.getAs[Long]("cnt1")
+        var m1: Map[String, String] = Map.empty[String, String]
+        m1 = EsQuery2.queryCompanyForCid(restClient, cid)
+        val industry_name = m1("category_first")
+        flag match {
+          case "306" => tenderScore(id, cid, cnt1, kind, prpject, industry_name)
+        }
+      } catch {
+        case e: Exception => {
+          logWarning(r.toString())
+          logError(e.getMessage, e)
+          null
+        }
+      }
+    })
+    df
+  }
+
   //招投标
   def tenderScore(id: Long, cid: String, cnt1: Long, kind: String, project: String, industry_name: String) = {
     var score = 0f
@@ -196,3 +248,62 @@ case class CompanyBidScore(s: SparkSession, sourceTable: String,
   }
 
 }
+
+object EsQuery2 {
+
+  def main(args: Array[String]): Unit = {
+    val client = EsRestUtils.getRestClient()
+    val map = queryCompanyForCid(client, "23537076")
+    println(map)
+  }
+
+  def queryCompanyForCid(restClient: RestClient, cid: String) = {
+    val query = ""
+    val entity = new NStringEntity(query, ContentType.APPLICATION_JSON)
+
+    val indexResponse = restClient.performRequest(
+      "GET",
+      s"/winhc-company/company/_search/?q=_id:$cid",
+      Collections.emptyMap[String, String](),
+      entity)
+    val en = indexResponse.getEntity
+    val res = EntityUtils.toString(en)
+    import scala.collection.JavaConverters._
+    val list = getIndexResult2(res)
+    if (list.nonEmpty) {
+      val id = list.head("_id").asInstanceOf[String]
+      val source: util.Map[String, Any] = list.head("_source").asInstanceOf[util.Map[String, Any]]
+      val province_code = source.get("province_code").asInstanceOf[String]
+      val city_code = source.get("city_code").asInstanceOf[String]
+      val county_code = source.get("county_code").asInstanceOf[String]
+      val reg_capital = source.get("reg_capital").asInstanceOf[String]
+      val category_first = source.get("category_first").asInstanceOf[String]
+      val category_second = source.get("category_second").asInstanceOf[String]
+      val category_third = source.get("category_third").asInstanceOf[String]
+      val estiblish_time = source.get("estiblish_time").asInstanceOf[String]
+      val phones = source.get("phones").asInstanceOf[util.List[String]].asScala.mkString(",")
+
+      Map(
+        "id" -> id,
+        "province_code" -> province_code,
+        "city_code" -> city_code,
+        "county_code" -> county_code,
+        "reg_capital" -> reg_capital,
+        "estiblish_time" -> estiblish_time,
+        "phones" -> phones,
+        "category_first" -> category_first,
+        "category_second" -> category_second,
+        "category_third" -> category_third
+      )
+    } else {
+      Map.empty[String, String]
+    }
+  }
+
+  def getIndexResult2(json: String) = {
+    import scala.collection.JavaConverters._
+    JSON.parseObject(json).getJSONObject("hits").getJSONArray("hits").toArray().map(m => m.asInstanceOf[util.Map[String, Any]]).map(_.asScala).toList
+  }
+}
+
+

+ 125 - 0
src/main/scala/com/winhc/bigdata/spark/model/CompanyEmploymentScore.scala

@@ -0,0 +1,125 @@
+package com.winhc.bigdata.spark.model
+
+import java.util.Date
+
+import com.winhc.bigdata.calc.DimScoreV2
+import com.winhc.bigdata.spark.utils.BaseUtil.atMonthsBefore
+import com.winhc.bigdata.spark.utils.{BaseUtil, LoggingUtils, Maxcomputer2Hbase, SparkUtils}
+import org.apache.spark.sql.{Row, SparkSession}
+
+import scala.annotation.meta.getter
+import scala.collection.mutable
+
+/**
+ * 招聘得分
+ */
+case class CompanyEmploymentScore(s: SparkSession, sourceTable: String, tableView: String = "",
+                                  flag: String, time: String, kind: String, project: String,
+                                  tp: String = "0", namespace: String = ""
+                                 ) extends LoggingUtils {
+
+  @(transient@getter) val spark: SparkSession = s
+
+  import spark.implicits._
+
+  def calc(): Unit = {
+    println(s"${this.getClass.getSimpleName} calc start! " + new Date().toString)
+    //val targetTable = "ads_company_total_score"
+    val adsTable = namespace + ".ads_" + sourceTable
+    val incAdsTable = namespace + ".inc_ads_" + sourceTable
+    val targetTable = namespace + ".ads_" + sourceTable + "_score"
+    var ds = ""
+
+    //区别有无分区表
+    var appsql2 = ""
+    var tb = adsTable
+    if ("1".equals(tp)) {
+      tb = tableView
+      ds = BaseUtil.getPartion(incAdsTable, spark)
+    } else {
+      ds = BaseUtil.getPartion(adsTable, spark)
+      appsql2 = s"AND  ds = ${ds}"
+    }
+
+    val df = sql(
+      s"""
+         |SELECT  *
+         |FROM    (
+         |        SELECT
+         |                *
+         |                ,COUNT(new_cid) OVER(PARTITION BY new_cid ) AS cnt1
+         |                ,SUM(CASE WHEN cast(start_date as string) >= '${atMonthsBefore(12)}' THEN 1 ELSE 0 END) OVER(PARTITION BY new_cid ) AS cnt2
+         |                ,ROW_NUMBER() OVER(PARTITION BY new_cid ORDER BY $time DESC ) AS num
+         |        FROM    $tb
+         |        WHERE   new_cid IS NOT NULL
+         |        ${appsql2}
+         |        ) a
+         |WHERE   num =1
+         |""".stripMargin)
+
+    df.map(r => {
+      trans(r, flag, kind, project)
+    }).toDF("id", "cid", "kind", "kind_code", "project", "project_code", "type",
+      "score", "total", "extraScore")
+      .createOrReplaceTempView(s"t1_view")
+
+    sql(s"insert overwrite table ${targetTable} " +
+      s"partition (ds='${ds}')  select * from t1_view")
+
+    //同步hbase
+    if ("1".equals(tp)) { //存量计算不用同步hbase
+      val dataFrame = sql(
+        s"""
+           |select
+           |CONCAT_WS('_',cid,project_code) AS rowkey,
+           |id,cid,kind,kind_code,project,project_code,type,score,total,extraScore
+           |from t1_view
+           |""".stripMargin)
+      Maxcomputer2Hbase(dataFrame, "COMPANY_SCORE").syn()
+    }
+    println(s"${this.getClass.getSimpleName} calc end! " + new Date().toString)
+  }
+
+  def trans(r: Row, flag: String, kind: String, prpject: String) = {
+    val id = r.getAs[Long]("id")
+    val cid = r.getAs[Long]("new_cid").toString
+    val cnt1 = r.getAs[Long]("cnt1")
+    val cnt2 = r.getAs[Long]("cnt2")
+    flag match {
+      case "302" => employmentScore(id, cid, cnt1,cnt2, kind, prpject)
+    }
+  }
+
+  //招聘
+  def employmentScore(id: Long, cid: String, cnt1: Long, cnt2: Long,kind: String, project: String) = {
+    var score = 0f
+    val total = 5f
+    val extraScore = 0f
+    var ty = ""
+    if (cnt2 > 0) {
+      score = 5f
+      ty = "近一年内有招聘信息"
+    } else if (cnt1 > 0) {
+      score = 4f
+      ty = "有招聘信息,但是近1年内无招聘信息"
+    } else{
+      score = 3f
+      ty = "无招聘信息"
+    }
+    (id, cid, kind, DimScoreV2.newsEventMap.get(kind), project, DimScoreV2.newsEventMap.get(project), ty,
+      score, total, extraScore)
+  }
+
+}
+
+object CompanyEmploymentScore {
+  def main(args: Array[String]): Unit = {
+    var config = mutable.Map(
+      "spark.hadoop.odps.project.name" -> "winhc_eci_dev",
+      "spark.hadoop.odps.spark.local.partition.amt" -> "10"
+    )
+    val spark: SparkSession = SparkUtils.InitEnv(this.getClass.getSimpleName, config)
+    CompanyEmploymentScore(spark, "company_employment","", "302", "start_date", "经营情况", "招聘", "0", "winhc_eci_dev").calc()
+    spark.stop()
+  }
+}

+ 49 - 3
src/main/scala/com/winhc/bigdata/spark/model/CompanyIntellectualsScore.scala

@@ -23,7 +23,9 @@ object CompanyIntellectualsScore {
       "company_patent_list" -> ("3;4", "pub_date", "资产权益", "实用新型、外观设计专利;发明专利"), //专利
       "company_icp" -> ("5", "examine_date", "资产权益", "网站"), //网站
       "company_tm" -> ("6", "app_date", "资产权益", "商标"), //商标
-      "company_land_announcement" -> ("7", "commit_time", "资产权益", "购地信息") //购地信息
+      "company_land_announcement" -> ("7", "commit_time", "资产权益", "购地信息"), //购地信息
+      "company_land_publicity" -> ("8", "publication_start_date", "资产权益", "土地公示"), //地块公示
+      "company_employment" -> ("208", "start_date", "经营情况", "招聘") //招聘
     )
 
   def main(args: Array[String]): Unit = {
@@ -40,7 +42,6 @@ object CompanyIntellectualsScore {
 
     //专利分成两部分
     if (flag.contains(";")) {
-
       flag.split(";").foreach(f => {
         new CompanyIntellectualsScore(spark, sourceTable, "", f, time, kind, project, "0", namespace + ".").calc()
       })
@@ -77,7 +78,11 @@ object CompanyIntellectualsScore {
       //sys.exit(0)
       return
     }
-
+    //招聘
+    if (flag.equals("302")) {
+      new CompanyEmploymentScore(spark, sourceTable, tableView, flag, time, kind, project, "1", namespace).calc()
+      return
+    }
     //专利分成两部分
     if (flag.contains(";")) {
       flag.split(";").foreach(f => {
@@ -187,6 +192,7 @@ case class CompanyIntellectualsScore(s: SparkSession, sourceTable: String, table
       case "5" => webSiteScore(id, cid, cnt1, kind, prpject)
       case "6" => tradeMarkScore(id, cid, cnt1, kind, prpject)
       case "7" => immovableScore(id, cid, cnt1, kind, prpject)
+      case "8" => land_publicity(id, cid, cnt1, kind, prpject)
     }
   }
 
@@ -344,4 +350,44 @@ case class CompanyIntellectualsScore(s: SparkSession, sourceTable: String, table
       score, total, extraScore)
   }
 
+  //地块公示
+  def land_publicity(id: Long, cid: String, cnt1: Long, kind: String, project: String) = {
+    var score = 0f
+    val total = 15f
+    val extraScore = 0f
+    var ty = ""
+    if (cnt1 == 0) {
+      score = 7f
+      ty = "无"
+    } else if (cnt1 <= 2) {
+      score = 12f
+      ty = "≤2"
+    } else {
+      score = 15f
+      ty = ">2"
+    }
+    (id, cid, kind, DimScoreV2.newsEventMap.get(kind), project, DimScoreV2.newsEventMap.get(project), ty,
+      score, total, extraScore)
+  }
+
+  //招聘
+  def company_employment(id: Long, cid: String, cnt1: Long, kind: String, project: String) = {
+    var score = 0f
+    val total = 5f
+    val extraScore = 0f
+    var ty = ""
+    if (cnt1 == 0) {
+      score = 3f
+      ty = "无招聘信息"
+    } else if (cnt1 <= 2) {
+      score = 12f
+      ty = "有招聘信息,但是近1年内无招聘信息"
+    } else {
+      score = 15f
+      ty = "近一年内有招聘信息"
+    }
+    (id, cid, kind, DimScoreV2.newsEventMap.get(kind), project, DimScoreV2.newsEventMap.get(project), ty,
+      score, total, extraScore)
+  }
+
 }

+ 31 - 5
src/main/scala/com/winhc/bigdata/spark/utils/BaseUtil.scala

@@ -1,7 +1,7 @@
 package com.winhc.bigdata.spark.utils
 
 import java.text.SimpleDateFormat
-import java.util.regex.{Matcher, Pattern}
+import java.util.regex.{Pattern}
 import java.util.{Calendar, Date, Locale}
 import org.apache.commons.lang3.StringUtils
 import org.apache.commons.lang3.time.DateFormatUtils
@@ -83,12 +83,38 @@ object BaseUtil {
     ""
   }
 
-  def replaceChar(s: String): String = {
-    if (StringUtils.isNotBlank(s)) s.replaceAll("、", ",").replaceAll(";", ",").replaceAll(",", ",").replaceAll(" ", ",")
-    else ""
+  private val replace_char = "[^\\u4e00-\\u9fa5a-zA-Z\\(\\)()]+".r
+
+  def replaceChar(s: String) = {
+    if (StringUtils.isNotBlank(s)) {
+      val arr =
+        s.replaceAll("、", ",")
+          .replaceAll(";", ",")
+          .replaceAll(",", ",")
+          .replaceAll(" ", ",")
+          .replaceAll("。", ",")
+          .replaceAll(";", ",")
+          .replaceAll(":", ",")
+          .replaceAll("\\s+", ",").split(",")
+
+      val list = arr.filter(_.length > 1).toList
+      if (list.nonEmpty) {
+        val sb = new StringBuilder
+        for (a <- list) {
+          sb.append(a).append(",")
+        }
+        sb.substring(0, sb.lastIndexOf(",")) toString()
+      }else{
+        ""
+      }
+    } else {
+      ""
+    }
   }
 
   def main(args: Array[String]): Unit = {
-    println(atMonthsBefore(0,"yyyy-MM-dd HH:mm:ss"))
+    println(replaceChar(",x,"))
+    println(replaceChar("华为信息科技公司,。百度科技公司"))
+    println(replaceChar("2015)深南法蛇民初第883-887受理郑委,曹   连云,庄忠杰,曹元洪,曹硕"))
   }
 }