瀏覽代碼

新增模型字段

xufei 4 年之前
父節點
當前提交
4a9c209180

+ 77 - 2
src/main/java/com/winhc/bigdata/calc/DimScoreV2.java

@@ -130,7 +130,7 @@ public class DimScoreV2 {
     // 企业类型得分
     public static EciScoreDetail econKindScore(String econKind, String isOnStock) {
         if (StrUtil.isAllBlank(econKind)) {
-            return new EciScoreDetail(Dict.ECI_KIND.基本情况.getCode(), "企业状态", econKind, 9, 15);
+            return new EciScoreDetail(Dict.ECI_KIND.基本情况.getCode(), "企业类型", econKind, 9, 15);
         }
         float score = 6;
         float extScore = 0;
@@ -268,8 +268,10 @@ public class DimScoreV2 {
     public static HashMap<String, String> bean2Map(EciScoreDetail s) {
 
         HashMap<String, String> m = new HashMap<>();
-        m.put("kind", s.getKind().toString());
+        m.put("kind", newsEventMap.getOrDefault(s.getKind().toString(),"-1"));
+        m.put("kind_code", s.getKind().toString());
         m.put("project", s.getProject());
+        m.put("project_code", newsEventMap.getOrDefault(s.getProject(),"-1"));
         m.put("type", s.getType());
         m.put("score", String.valueOf(s.getScore()));
         m.put("total", String.valueOf(s.getTotal()));
@@ -277,6 +279,79 @@ public class DimScoreV2 {
         return m;
     }
 
+    public static HashMap<String, String> newsEventMap = new HashMap<String, String>(){{
+        //大类code
+        put("1","基本情况");
+        put("2","经营情况");
+        put("3","资产权益");
+        put("4","经营风险");
+        put("5","法律风险");
+
+        //小类code
+        //基本情况
+        put("注册资本","101");
+        put("实缴资本","102");
+        put("所属行业","103");
+        put("企业类型","104");
+        put("企业状态","105");
+        put("成立日期","106");
+        put("营业期限","107");
+        put("注册地址","108");
+        //经营情况
+        put("人员规模/参保人数","201");
+        put("最终受益人","202");
+        put("疑似实际控制人","203");
+        put("法定代表人","204");
+        put("抽查检查","205");
+        put("年报信息","206");
+        put("分支机构","207");
+        put("招聘","208");
+        put("税务信用","209");
+        put("融资信息","210");
+        put("债券信息","211");
+        put("特殊资质","212");
+        //资产权益
+        put("对外投资","301");
+        put("增资记录","302");
+        put("股权出质(质权人)","303");
+        put("土地公示","304");
+        put("购地信息","305");
+        put("招投标","306");
+        put("实用新型、外观设计专利","307");
+        put("发明专利","308");
+        put("商标","309");
+        put("软著作权","310");
+        put("著作权","311");
+        put("网站","312");
+        //经营风险
+        put("解散清算","401");
+        put("简易注销","402");
+        put("严重违法行为","403");
+        put("减资记录","404");
+        put("土地转让","405");
+        put("土地抵押","406");
+        put("动产","407");
+        put("股权出质(出质人)","408");
+        put("经营异常","409");
+        put("行政处罚","410");
+        put("环保处罚","411");
+        put("税收违法","412");
+        put("欠税公告","413");
+        put("公示催告","414");
+        //法律风险
+        put("被执行情况","501");
+        put("失信情况","502");
+        put("历史失信情况","503");
+        put("裁判文书","504");
+        put("破产重整","505");
+        put("司法拍卖","506");
+        put("开庭公告","507");
+        put("股权冻结","508");
+        put("法院公告","509");
+        put("立案信息","510");
+    }};
+
+
     public static void main(String[] args) {
 
     }

+ 1 - 1
src/main/scala/com/winhc/bigdata/spark/jobs/CompanyCopyrightReg.scala

@@ -62,7 +62,7 @@ object CompanyCopyrightReg {
          |        ) c
          |LEFT JOIN company_name_mapping d
          |ON      c.cid = d.cid
-         |""".stripMargin)
+         |""".stripMargin).dropDuplicates("reg_num", "full_name","res_cid")
       .createOrReplaceTempView(s"t2")
 
     //聚合新cids

+ 24 - 31
src/main/scala/com/winhc/bigdata/spark/jobs/CompanyInfoCalculatorV2.scala

@@ -1,14 +1,13 @@
 package com.winhc.bigdata.spark.jobs
 
+import java.util
 import java.util.Date
-
 import com.winhc.bigdata.calc.{DimScore, DimScoreV2}
 import com.winhc.bigdata.spark.utils.SparkUtils
 import org.apache.commons.lang3.StringUtils
 import org.apache.commons.logging.LogFactory
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.sql.{Row, SparkSession}
-
 import scala.collection.mutable
 
 object CompanyInfoCalculatorV2 {
@@ -23,7 +22,7 @@ object CompanyInfoCalculatorV2 {
     }
 
     var config = mutable.Map.empty[String, String]
-    val Array(instances, cores, memory) = args;
+    val Array(instances, cores, memory) = args
 
     println(
       s"""
@@ -44,28 +43,30 @@ object CompanyInfoCalculatorV2 {
 
     println("company calc start! " + new Date().toString)
 
-    val tableName = "new_ods_company"
-    val resultTable = "ads_company_score_v3"
+    val ods_company = "new_ods_company"
+    val company_score = "ads_company_score_v3"
+    val company_category = "const_company_category_code"
+    val company_stock = "ods_company_stock"
 
     //所属行业
     val code2Name: Broadcast[Map[String, String]] = spark.sparkContext.broadcast(sql(
-      """
+      s"""
         |select category_code,category_str_big
-        |from const_company_category_code
+        |from $company_category
       """.stripMargin).collect().map(r => {
       (r.getString(0), r.getString(1))
     }).toMap)
 
     //上市信息
-    val stock: Broadcast[Map[Long, String]] = spark.sparkContext.broadcast(sql(
-      """
+    val stock: Broadcast[Map[String, String]] = spark.sparkContext.broadcast(sql(
+      s"""
         |select cid,name
-        |from ods_company_stock
+        |from $company_stock
       """.stripMargin).collect().map(r => {
-      (r.getLong(0), "1")
+      (r.getLong(0).toString, "1")
     }).toMap)
 
-    val df = sql(
+    sql(
       s"""
          |SELECT  id,cid,name
          |        ,reg_capital
@@ -81,25 +82,24 @@ object CompanyInfoCalculatorV2 {
          |        ,CAST(from_time AS STRING) from_time
          |        ,CAST(to_time AS STRING) to_time
          |        ,reg_location
-         |FROM    ${tableName}
+         |FROM    ${ods_company}
+         |where   cid is not null
          |""".stripMargin).flatMap(r => {
       trans(stock, code2Name, r)
-    }).toDF("id", "cid", "name", "kind", "project", "type", "score", "total", "extraScore")
+    }).toDF("id", "cid", "name", "kind", "kind_code", "project", "project_code", "type", "score", "total", "extraScore")
+      .createOrReplaceTempView(s"${ods_company}_tmp_view")
 
-    // 写 分区表
-    df.createOrReplaceTempView(s"${tableName}_tmp_view")
-    sql(s"insert overwrite table ${resultTable}  select * from ${tableName}_tmp_view")
-    //    df.show(100)
+    sql(s"insert overwrite table ${company_score}  select * from ${ods_company}_tmp_view")
     println("company calc end! " + new Date().toString)
 
     spark.stop();
   }
 
 
-  private def trans(stock: Broadcast[Map[Long, String]], code2Name: Broadcast[Map[String, String]], r: Row) = {
+  private def trans(stock: Broadcast[Map[String, String]], code2Name: Broadcast[Map[String, String]], r: Row) = {
 
     val id = r.getAs[Long]("id")
-    val cid = r.getAs[Long]("cid")
+    val cid = r.getAs[Long]("cid").toString
     val name = r.getAs[String]("name")
     val reg_capital = r.getAs[String]("reg_capital")
     val actual_capital_amount = r.getAs[Long]("actual_capital_amount")
@@ -112,7 +112,7 @@ object CompanyInfoCalculatorV2 {
     val reg_location = r.getAs[String]("reg_location")
 
     var actual_capital = ""
-    //实缴资本转换
+    //实缴资本单位 分转换
     if (actual_capital_amount > 0 && StringUtils.isNotBlank(actual_capital_currency)) {
       actual_capital = actual_capital_amount / 100 + "元" + actual_capital_currency
     }
@@ -126,15 +126,8 @@ object CompanyInfoCalculatorV2 {
     val r7 = DimScoreV2.bean2Map(DimScoreV2.termScore(to_time))
     val r8 = DimScoreV2.bean2Map(DimScoreV2.addressScore(reg_location))
 
-    Seq(
-      (id, cid, name, r1.get("kind"), r1.get("project"), r1.get("type"), r1.get("score"), r1.get("total"), r1.get("extraScore")),
-      (id, cid, name, r2.get("kind"), r2.get("project"), r2.get("type"), r2.get("score"), r2.get("total"), r2.get("extraScore")),
-      (id, cid, name, r3.get("kind"), r3.get("project"), r3.get("type"), r3.get("score"), r3.get("total"), r3.get("extraScore")),
-      (id, cid, name, r4.get("kind"), r4.get("project"), r4.get("type"), r4.get("score"), r4.get("total"), r4.get("extraScore")),
-      (id, cid, name, r5.get("kind"), r5.get("project"), r5.get("type"), r5.get("score"), r5.get("total"), r5.get("extraScore")),
-      (id, cid, name, r6.get("kind"), r6.get("project"), r6.get("type"), r6.get("score"), r6.get("total"), r6.get("extraScore")),
-      (id, cid, name, r7.get("kind"), r7.get("project"), r7.get("type"), r7.get("score"), r7.get("total"), r7.get("extraScore")),
-      (id, cid, name, r8.get("kind"), r8.get("project"), r8.get("type"), r8.get("score"), r8.get("total"), r8.get("extraScore"))
-    )
+    Seq(r1, r2, r3, r4, r5, r6, r7, r8)
+      .map(m => (id, cid, name, m.get("kind"), m.get("kind_code"), m.get("project"), m.get("project_code"),
+        m.get("type"), m.get("score").toFloat, m.get("total").toFloat, m.get("extraScore").toFloat))
   }
 }