|
@@ -3,7 +3,9 @@ package com.winhc.bigdata.spark.model
|
|
|
import java.util.Date
|
|
|
|
|
|
import com.winhc.bigdata.calc.DimScoreV2
|
|
|
-import com.winhc.bigdata.spark.utils.{BaseUtil, LoggingUtils, SparkUtils}
|
|
|
+import com.winhc.bigdata.spark.udf.CompanyMapping
|
|
|
+import com.winhc.bigdata.spark.utils.{BaseUtil, LoggingUtils, Maxcomputer2Hbase, SparkUtils}
|
|
|
+import org.apache.commons.lang3.StringUtils
|
|
|
import org.apache.spark.sql.{Row, SparkSession}
|
|
|
|
|
|
import scala.annotation.meta.getter
|
|
@@ -18,67 +20,107 @@ object CompanyCourtAnnouncement {
|
|
|
val tabMapping: Map[String, (String, String, String, String)] =
|
|
|
Map("ads_company_court_announcement_list" -> ("1", "publish_date", "法律风险", "法院公告"), //法院公告
|
|
|
"ads_company_court_open_announcement_list" -> ("2", "start_date", "法律风险", "开庭公告"), //开庭公告
|
|
|
- "ads_company_court_register_list" -> ("3", "filing_date", "法律风险", "立案信息"), //立案信息
|
|
|
- "ads_company_lawsuit_list" -> ("4", "judge_time", "法律风险", "裁判文书") //裁判文书
|
|
|
+ "ads_company_court_register_list" -> ("3", "filing_date", "法律风险", "立案信息") //立案信息
|
|
|
+ //,"ads_company_lawsuit_list" -> ("4", "judge_time", "法律风险", "裁判文书") //裁判文书//todo
|
|
|
)
|
|
|
|
|
|
def main(args: Array[String]): Unit = {
|
|
|
|
|
|
- val (sourceTable, flag, time, kind, project) = valid(args)
|
|
|
-
|
|
|
- var config = mutable.Map.empty[String, String]
|
|
|
+ val namespace = "winhc_eci_dev"
|
|
|
+ val config = mutable.Map(
|
|
|
+ "spark.hadoop.odps.project.name" -> s"$namespace",
|
|
|
+ "spark.hadoop.odps.spark.local.partition.amt" -> "1000"
|
|
|
+ )
|
|
|
|
|
|
val spark: SparkSession = SparkUtils.InitEnv(this.getClass.getSimpleName, config)
|
|
|
+ //法院公告
|
|
|
+ CompanyCourtAnnouncement(spark, "company_court_announcement_list", "", "509", "update_time", "法律风险", "法院公告", "0", s"$namespace").calc()
|
|
|
+ //开庭公告
|
|
|
+ CompanyCourtAnnouncement(spark, "company_court_open_announcement_list", "", "507", "update_time", "法律风险", "开庭公告", "0", s"$namespace").calc()
|
|
|
+ //立案信息
|
|
|
+ CompanyCourtAnnouncement(spark, "company_court_register_list", "", "510", "update_time", "法律风险", "立案信息", "0", s"$namespace").calc()
|
|
|
|
|
|
- new CompanyCourtAnnouncement(spark, sourceTable, flag, time, kind, project).calc()
|
|
|
spark.stop()
|
|
|
|
|
|
}
|
|
|
|
|
|
- def valid(args: Array[String]) = {
|
|
|
- if (args.length != 1) {
|
|
|
- println("请输入要计算的table!!!! ")
|
|
|
- sys.exit(-1)
|
|
|
- }
|
|
|
- val sourceTable = args(0)
|
|
|
-
|
|
|
- val (flag, time, kind, project) = tabMapping.getOrElse(sourceTable, ("", "", "", ""))
|
|
|
- if (flag.isEmpty || time.isEmpty || kind.isEmpty || project.isEmpty) {
|
|
|
- println("输入表不存在!!! ")
|
|
|
- sys.exit(-1)
|
|
|
- }
|
|
|
- (sourceTable, flag, time, kind, project)
|
|
|
- }
|
|
|
}
|
|
|
|
|
|
-case class CompanyCourtAnnouncement(s: SparkSession, sourceTable: String,
|
|
|
- flag: String, time: String, kind: String, project: String
|
|
|
- ) extends LoggingUtils {
|
|
|
+case class CompanyCourtAnnouncement(s: SparkSession, sourceTable: String, tableView: String = "",
|
|
|
+ flag: String, time: String, kind: String, project: String,
|
|
|
+ tp: String, namespace: String
|
|
|
+ ) extends LoggingUtils with CompanyMapping {
|
|
|
|
|
|
@(transient@getter) val spark: SparkSession = s
|
|
|
|
|
|
import spark.implicits._
|
|
|
|
|
|
+ val nameMapping: Map[String, (String, String)] =
|
|
|
+ Map("company_court_announcement_list" -> ("plaintiff", "litigant"), //法院公告
|
|
|
+ "company_court_open_announcement_list" -> ("plaintiff", "defendant"), //开庭公告
|
|
|
+ "company_court_register_list" -> ("plaintiff", "defendant") //立案信息
|
|
|
+ )
|
|
|
+
|
|
|
def calc(): Unit = {
|
|
|
println(s"company ${this.getClass.getSimpleName} calc start! " + new Date().toString)
|
|
|
|
|
|
+ prepareFunctions(spark)
|
|
|
+
|
|
|
+ val adsTable = namespace + ".ads_" + sourceTable
|
|
|
+ val incAdsTable = namespace + ".inc_ads_" + sourceTable
|
|
|
+ val targetTable = namespace + ".ads_" + sourceTable + "_score"
|
|
|
+ var ds = ""
|
|
|
+
|
|
|
+ //最近三个月内
|
|
|
var sqlapp = ""
|
|
|
- if (!"4".equals(flag)) {
|
|
|
- sqlapp = s"and $time >= '${BaseUtil.atMonthsBefore(3)}'"
|
|
|
+ sqlapp = s"and $time >= '${BaseUtil.atMonthsBefore(3)}'"
|
|
|
+
|
|
|
+ val company_mapping = s"$namespace.base_company_mapping"
|
|
|
+ val mapping_ds = BaseUtil.getPartion(company_mapping, spark)
|
|
|
+
|
|
|
+ //区别有无分区表
|
|
|
+ var appsql2 = ""
|
|
|
+ var tb = adsTable
|
|
|
+ if ("1".equals(tp)) {
|
|
|
+ tb = tableView
|
|
|
+ ds = BaseUtil.getPartion(incAdsTable, spark)
|
|
|
+ } else {
|
|
|
+ ds = BaseUtil.getPartion(adsTable, spark)
|
|
|
+ appsql2 = s"AND ds = ${ds}"
|
|
|
+ }
|
|
|
+
|
|
|
+ //获取原被告字段
|
|
|
+ val (yg_name, bg_name) = nameMapping.getOrElse(sourceTable, ("", ""))
|
|
|
+ if (StringUtils.isBlank(yg_name) || StringUtils.isBlank(yg_name)) {
|
|
|
+ println("no table mapping ....")
|
|
|
+ sys.exit(-1)
|
|
|
}
|
|
|
|
|
|
val df = sql(
|
|
|
s"""
|
|
|
|SELECT *
|
|
|
|FROM (
|
|
|
- | SELECT *
|
|
|
- | ,sum(CASE WHEN party_role = 'y' THEN 1 ELSE 0 END) OVER(PARTITION BY new_cid) AS cnt1
|
|
|
- | ,sum(CASE WHEN party_role = 'n' THEN 1 ELSE 0 END) OVER(PARTITION BY new_cid) AS cnt2
|
|
|
+ | SELECT sum(CASE WHEN role = 'y' THEN 1 ELSE 0 END) OVER(PARTITION BY new_cid) AS cnt1
|
|
|
+ | ,sum(CASE WHEN role = 'b' THEN 1 ELSE 0 END) OVER(PARTITION BY new_cid) AS cnt2
|
|
|
| ,row_number() OVER(PARTITION BY new_cid ORDER BY $time DESC) AS num
|
|
|
- | FROM $sourceTable
|
|
|
- | WHERE ds = '${BaseUtil.getPartion(sourceTable, spark)}' and new_cid is not null
|
|
|
- | ${sqlapp}
|
|
|
- | ) a
|
|
|
+ | ,*
|
|
|
+ | FROM (
|
|
|
+ | SELECT b.cname
|
|
|
+ | ,name_judge(coalesce(cleanup(b.cname),''),cleanup($yg_name),cleanup($bg_name)) role
|
|
|
+ | ,a.*
|
|
|
+ | FROM (
|
|
|
+ | SELECT *
|
|
|
+ | FROM $tb
|
|
|
+ | WHERE new_cid IS NOT NULL ${appsql2} ${sqlapp}
|
|
|
+ | ) a
|
|
|
+ | LEFT JOIN (
|
|
|
+ | SELECT *
|
|
|
+ | FROM $company_mapping
|
|
|
+ | WHERE ds = '$mapping_ds'
|
|
|
+ | ) b
|
|
|
+ | ON a.cid = b.cid
|
|
|
+ | ) c
|
|
|
+ | ) d
|
|
|
|WHERE num = 1
|
|
|
|""".stripMargin)
|
|
|
|
|
@@ -86,16 +128,25 @@ case class CompanyCourtAnnouncement(s: SparkSession, sourceTable: String,
|
|
|
trans(r, flag, kind, project)
|
|
|
}).toDF("id", "cid", "kind", "kind_code", "project", "project_code", "type",
|
|
|
"score", "total", "extraScore")
|
|
|
- .createOrReplaceTempView(s"${sourceTable}_tmp_view")
|
|
|
+ .createOrReplaceTempView(s"t1_view_${sourceTable}")
|
|
|
|
|
|
- logInfo(
|
|
|
- s"""
|
|
|
- |- - - - - - - - - - - - - - - - - - - - - - - - -
|
|
|
- |${showString(sql(s"select * from ${sourceTable}_tmp_view"))}
|
|
|
- |- - - - - - - - - - - - - - - - - - - - - - - - -
|
|
|
- """.stripMargin)
|
|
|
+ sql(s"select * from t1_view_${sourceTable}").show(20, false)
|
|
|
+
|
|
|
+ sql(s"insert overwrite table ${targetTable} " +
|
|
|
+ s"partition (ds='${ds}') select * from t1_view_${sourceTable}")
|
|
|
|
|
|
- sql(s"insert overwrite table ${sourceTable}_score select * from ${sourceTable}_tmp_view")
|
|
|
+ //同步hbase
|
|
|
+ if ("1".equals(tp)) { //存量计算不用同步hbase
|
|
|
+ val dataFrame = sql(
|
|
|
+ s"""
|
|
|
+ |select
|
|
|
+ |CONCAT_WS('_',cid,project_code) AS rowkey,
|
|
|
+ |id,cid,kind,kind_code,project,project_code,type,score,total,extraScore
|
|
|
+ |from t1_view_${sourceTable}
|
|
|
+ |""".stripMargin)
|
|
|
+
|
|
|
+ Maxcomputer2Hbase(dataFrame, "COMPANY_SCORE").syn()
|
|
|
+ }
|
|
|
|
|
|
println(s"company ${this.getClass.getSimpleName} calc end! " + new Date().toString)
|
|
|
}
|
|
@@ -107,10 +158,10 @@ case class CompanyCourtAnnouncement(s: SparkSession, sourceTable: String,
|
|
|
val cnt1 = r.getAs[Long]("cnt1")
|
|
|
val cnt2 = r.getAs[Long]("cnt2")
|
|
|
flag match {
|
|
|
- case "1" => getInfoAnnouncement(id, cid, cnt1, cnt2, kind, prpject)
|
|
|
- case "2" => getInfoOpenAnnouncement(id, cid, cnt1, cnt2, kind, prpject)
|
|
|
- case "3" => getInforegister(id, cid, cnt1, cnt2, kind, prpject)
|
|
|
- case "4" => getRefereeScore(id, cid, cnt1, cnt2, kind, prpject)
|
|
|
+ case "509" => getInfoAnnouncement(id, cid, cnt1, cnt2, kind, prpject)
|
|
|
+ case "507" => getInfoOpenAnnouncement(id, cid, cnt1, cnt2, kind, prpject)
|
|
|
+ case "510" => getInforegister(id, cid, cnt1, cnt2, kind, prpject)
|
|
|
+ //case "4" => getRefereeScore(id, cid, cnt1, cnt2, kind, prpject)
|
|
|
}
|
|
|
}
|
|
|
|