|
@@ -0,0 +1,205 @@
|
|
|
+package com.winhc.bigdata.spark.ng.credit_punishment.udf
|
|
|
+
|
|
|
+import com.alibaba.fastjson.{JSON, JSONPath}
|
|
|
+import com.winhc.bigdata.spark.utils.{BaseUtil, RegCapitalAmount}
|
|
|
+import org.apache.commons.lang3.StringUtils
|
|
|
+import org.apache.spark.sql.Row
|
|
|
+import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction}
|
|
|
+import org.apache.spark.sql.types._
|
|
|
+
|
|
|
+import java.sql.Timestamp
|
|
|
+import java.text.DecimalFormat
|
|
|
+import java.time.ZoneId
|
|
|
+import java.time.format.DateTimeFormatter
|
|
|
+import java.util.Locale
|
|
|
+
|
|
|
+/**
|
|
|
+ * @author: XuJiakai
|
|
|
+ * @date: 2021/5/11 09:23
|
|
|
+ */
|
|
|
+case class CreditPunishmentCaseAggUDF() extends UserDefinedAggregateFunction {
|
|
|
+
|
|
|
+ private val tn_map = Map(
|
|
|
+ "company_dishonest_info" -> "1"
|
|
|
+ , "company_zxr" -> "2"
|
|
|
+ , "company_zxr_final_case" -> "3"
|
|
|
+ , "company_zxr_restrict" -> "4"
|
|
|
+ )
|
|
|
+ private val tn_name_map = Map(
|
|
|
+ "company_dishonest_info" -> "失信被执行人"
|
|
|
+ , "company_zxr" -> "被执行人"
|
|
|
+ , "company_zxr_final_case" -> "终本案件"
|
|
|
+ , "company_zxr_restrict" -> "限制高消费"
|
|
|
+ )
|
|
|
+
|
|
|
+ private val delimiter = "@@"
|
|
|
+
|
|
|
+
|
|
|
+ override def inputSchema: StructType = StructType(Array[StructField](
|
|
|
+ StructField("rowkey", StringType)
|
|
|
+ , StructField("tn", StringType)
|
|
|
+ , StructField("keyno", StringType)
|
|
|
+ , StructField("card_num", StringType)
|
|
|
+ , StructField("case_create_time", TimestampType)
|
|
|
+ , StructField("deleted", LongType)
|
|
|
+ , StructField("detail_data", StringType)
|
|
|
+ ))
|
|
|
+
|
|
|
+ override def bufferSchema: StructType = StructType(Array(
|
|
|
+ StructField("rowkey", ArrayType(StringType, containsNull = false)) // 0
|
|
|
+ , StructField("card_num", ArrayType(StringType, containsNull = false)) // 1
|
|
|
+ , StructField("keyno", ArrayType(StringType, containsNull = false)) // 2
|
|
|
+ , StructField("label", ArrayType(StringType, containsNull = false)) // 3
|
|
|
+ , StructField("case_create_time", TimestampType) // 4
|
|
|
+ , StructField("total_exec_amount", DoubleType) //累计被执行总金额 5
|
|
|
+ , StructField("total_no_exec_amount", DoubleType) //疑似当前欠款总金额 6
|
|
|
+ , StructField("zxr_total_exec_amount", DoubleType) //被执行人当前被执行总金额 7
|
|
|
+ , StructField("final_case_exec_amount", DoubleType) //终本案件执行标的总金额 8
|
|
|
+ , StructField("final_case_no_exec_amount", DoubleType) //终本案件未履行总金额 9
|
|
|
+ ))
|
|
|
+
|
|
|
+ override def dataType: DataType = DataTypes.createMapType(StringType, StringType)
|
|
|
+
|
|
|
+ override def deterministic: Boolean = false
|
|
|
+
|
|
|
+ override def initialize(buffer: MutableAggregationBuffer): Unit = {
|
|
|
+ buffer.update(0, Seq.empty[String])
|
|
|
+ buffer.update(1, Seq.empty[String])
|
|
|
+ buffer.update(2, Seq.empty[String])
|
|
|
+ buffer.update(3, Seq.empty[String])
|
|
|
+ buffer.update(4, null)
|
|
|
+ buffer.update(5, 0d)
|
|
|
+ buffer.update(6, 0d)
|
|
|
+ buffer.update(7, 0d)
|
|
|
+ buffer.update(8, 0d)
|
|
|
+ buffer.update(9, 0d)
|
|
|
+ }
|
|
|
+
|
|
|
+ override def update(buffer: MutableAggregationBuffer, input: Row): Unit = {
|
|
|
+ val rowkey = input.getString(0)
|
|
|
+ val tn = input.getString(1)
|
|
|
+ val keyno = input.getString(2)
|
|
|
+ val card_num = input.getString(3)
|
|
|
+ val case_create_time = input.getTimestamp(4)
|
|
|
+ val deleted = input.getLong(5)
|
|
|
+ val detail_data = input.getString(6)
|
|
|
+
|
|
|
+ deleted match {
|
|
|
+ //不计历史信息:
|
|
|
+ case 0 => {
|
|
|
+ buffer(0) = s"$tn$delimiter$rowkey" +: buffer.getSeq[String](0)
|
|
|
+ buffer(1) = card_num +: buffer.getSeq[String](1)
|
|
|
+ buffer(2) = keyno +: buffer.getSeq[String](2)
|
|
|
+ buffer(3) = tn +: buffer.getSeq[String](3)
|
|
|
+ buffer(4) = getCaseCreateTime(buffer.getTimestamp(4), case_create_time)
|
|
|
+ buffer(6) = buffer.getDouble(6) + getAmount(detail_data, "$.no_exec_amount")
|
|
|
+ }
|
|
|
+ case 1 => {
|
|
|
+ }
|
|
|
+ case _ => return
|
|
|
+ }
|
|
|
+
|
|
|
+ tn match {
|
|
|
+ case "company_zxr" => {
|
|
|
+ buffer(5) = buffer.getDouble(5) + getAmount(detail_data, "$.exec_money")
|
|
|
+ buffer(7) = buffer.getDouble(7) + getAmount(detail_data, "$.exec_money")
|
|
|
+ }
|
|
|
+ case "company_zxr_final_case" => {
|
|
|
+ buffer(8) = buffer.getDouble(8) + getAmount(detail_data, "$.exec_money")
|
|
|
+ buffer(9) = buffer.getDouble(9) + getAmount(detail_data, "$.no_exec_amount")
|
|
|
+ }
|
|
|
+ case _ => {}
|
|
|
+ }
|
|
|
+
|
|
|
+ }
|
|
|
+
|
|
|
+ override def merge(buffer1: MutableAggregationBuffer, buffer2: Row): Unit = {
|
|
|
+ buffer1(0) = buffer1.getSeq[String](0) ++ buffer2.getSeq[String](0)
|
|
|
+ buffer1(1) = buffer1.getSeq[String](1) ++ buffer2.getSeq[String](1)
|
|
|
+ buffer1(2) = buffer1.getSeq[String](2) ++ buffer2.getSeq[String](2)
|
|
|
+ buffer1(3) = buffer1.getSeq[String](3) ++ buffer2.getSeq[String](3)
|
|
|
+ buffer1(4) = getCaseCreateTime(buffer1.getTimestamp(4), buffer2.getTimestamp(4))
|
|
|
+ buffer1(5) = buffer1.getDouble(5) + buffer2.getDouble(5)
|
|
|
+ buffer1(6) = buffer1.getDouble(6) + buffer2.getDouble(6)
|
|
|
+ buffer1(7) = buffer1.getDouble(7) + buffer2.getDouble(7)
|
|
|
+ buffer1(8) = buffer1.getDouble(8) + buffer2.getDouble(8)
|
|
|
+ buffer1(9) = buffer1.getDouble(9) + buffer2.getDouble(9)
|
|
|
+ }
|
|
|
+
|
|
|
+ override def evaluate(buffer: Row): Any = {
|
|
|
+ val strings = buffer.getSeq[String](0).distinct.filter(StringUtils.isNotBlank)
|
|
|
+
|
|
|
+ val tnDistribution: Map[String, Int] = strings.map(r => r.split(delimiter)(0)).groupBy(f => f).mapValues(_.size)
|
|
|
+
|
|
|
+ val rowkey = strings
|
|
|
+ .map(r => {
|
|
|
+ val s = r.split(delimiter)
|
|
|
+ s"${tn_map(s(0))}$delimiter${s(1)}"
|
|
|
+ })
|
|
|
+ val card_num: Seq[String] = buffer.getSeq[String](1).distinct
|
|
|
+ .filter(BaseUtil.is_id_card(_)).map(BaseUtil.id_card_trim).filter(StringUtils.isNotBlank)
|
|
|
+
|
|
|
+ val keyno = buffer.getSeq[String](2).distinct.filter(StringUtils.isNotBlank)
|
|
|
+ val label = buffer.getSeq[String](3).distinct.map(tn_name_map(_))
|
|
|
+ val case_create_time = buffer.getTimestamp(4)
|
|
|
+ val total_exec_amount = buffer.getDouble(5)
|
|
|
+ val total_no_exec_amount = buffer.getDouble(6)
|
|
|
+ val zxr_total_exec_amount = buffer.getDouble(7)
|
|
|
+ val final_case_exec_amount = buffer.getDouble(8)
|
|
|
+ val final_case_no_exec_amount = buffer.getDouble(9)
|
|
|
+ val df = DateTimeFormatter.ofPattern("yyyy-MM-dd").withLocale(Locale.CHINA).withZone(ZoneId.systemDefault)
|
|
|
+
|
|
|
+ val cct = if (case_create_time == null) null else df.format(case_create_time.toInstant)
|
|
|
+ val deleted = if (rowkey.isEmpty) "1" else "0"
|
|
|
+ Map(
|
|
|
+ "rowkey" -> rowkey.mkString(",")
|
|
|
+ , "card_num" -> card_num.headOption.getOrElse(null)
|
|
|
+ , "keyno" -> keyno.headOption.getOrElse(null)
|
|
|
+ , "label" -> label.mkString(",")
|
|
|
+ , "case_create_time" -> cct
|
|
|
+ , "record_num" -> s"${rowkey.length}"
|
|
|
+ , "total_exec_amount" -> double2String(total_exec_amount)
|
|
|
+ , "total_no_exec_amount" -> double2String(total_no_exec_amount)
|
|
|
+ , "zxr_total_exec_amount" -> double2String(zxr_total_exec_amount)
|
|
|
+ , "final_case_exec_amount" -> double2String(final_case_exec_amount)
|
|
|
+ , "final_case_no_exec_amount" -> double2String(final_case_no_exec_amount)
|
|
|
+
|
|
|
+ , "company_dishonest_info_num" -> s"${tnDistribution.getOrElse("company_dishonest_info", 0)}"
|
|
|
+ , "company_zxr_num" -> s"${tnDistribution.getOrElse("company_zxr", 0)}"
|
|
|
+ , "company_zxr_final_case_num" -> s"${tnDistribution.getOrElse("company_zxr_final_case", 0)}"
|
|
|
+ , "company_zxr_restrict_num" -> s"${tnDistribution.getOrElse("company_zxr_restrict", 0)}"
|
|
|
+
|
|
|
+ , "deleted" -> deleted
|
|
|
+ )
|
|
|
+
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ private def getAmount(json: String, jsonPath: String): Double = {
|
|
|
+ val str = JSONPath.eval(JSON.parseObject(json), jsonPath).asInstanceOf[String]
|
|
|
+ try {
|
|
|
+ RegCapitalAmount.getAmount(str).toDouble / 100
|
|
|
+ } catch {
|
|
|
+ case ex: Exception => 0d
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ private def getCaseCreateTime(time1: Timestamp, time2: Timestamp): Timestamp = {
|
|
|
+ if (time2 == null && time1 == null)
|
|
|
+ return null
|
|
|
+ if (time2 == null)
|
|
|
+ return time1
|
|
|
+ if (time1 == null)
|
|
|
+ return time2
|
|
|
+
|
|
|
+ if (time2.compareTo(time1) > 0)
|
|
|
+ time1
|
|
|
+ else
|
|
|
+ time2
|
|
|
+ }
|
|
|
+
|
|
|
+ private val dof = new DecimalFormat("0.##")
|
|
|
+
|
|
|
+ private def double2String(d: Double): String = dof.format(d)
|
|
|
+}
|