ソースを参照

扩充日志信息

xufei 5 年 前
コミット
ec980aa590

+ 40 - 14
src/main/scala/com/winhc/bigdata/spark/jobs/CompanyCourtAnnouncement.scala

@@ -1,42 +1,68 @@
 package com.winhc.bigdata.spark.jobs
 
 import java.util.Date
-import com.winhc.bigdata.spark.utils.{BaseUtil, SparkUtils}
+
+import com.winhc.bigdata.spark.utils.{BaseUtil, LoggingUtils, SparkUtils}
 import org.apache.spark.sql.{Row, SparkSession}
 import com.winhc.bigdata.calc.DimScoreV2
+import com.winhc.bigdata.spark.jobs.CompanyCourtAnnouncement.tabMapping
+
+import scala.annotation.meta.getter
 import scala.collection.mutable
 
 /**
  * 法院公告,开庭公告,立案信息
  */
+
 object CompanyCourtAnnouncement {
 
-  val tabMapping = Map("ads_company_court_announcement_list" -> ("1", "publish_date", "法律风险", "法院公告"), //法院公告
-    "ads_company_court_open_announcement_list" -> ("2", "start_date", "法律风险", "开庭公告"), //开庭公告
-    "ads_company_court_register_list" -> ("3", "filing_date", "法律风险", "立案信息") //立案信息
-  )
+  val tabMapping: Map[String, (String, String, String, String)] =
+    Map("ads_company_court_announcement_list" -> ("1", "publish_date", "法律风险", "法院公告"), //法院公告
+      "ads_company_court_open_announcement_list" -> ("2", "start_date", "法律风险", "开庭公告"), //开庭公告
+      "ads_company_court_register_list" -> ("3", "filing_date", "法律风险", "立案信息") //立案信息
+    )
 
   def main(args: Array[String]): Unit = {
 
+    val (sourceTable, flag, time, kind, project) = valid(args)
+
+    var config = mutable.Map.empty[String, String]
+
+    val spark: SparkSession = SparkUtils.InitEnv(this.getClass.getSimpleName, config)
+
+    new CompanyCourtAnnouncement(spark, sourceTable, flag, time, kind, project).calc()
+    spark.stop()
+
+  }
+
+  def valid(args: Array[String]) = {
     if (args.length != 1) {
       println("请输入要计算的table!!!! ")
       sys.exit(-1)
     }
-
     val sourceTable = args(0)
 
-    var config = mutable.Map.empty[String, String]
-    val spark: SparkSession = SparkUtils.InitEnv(this.getClass.getSimpleName, config)
-    import spark.implicits._
-    import spark._
-    import org.apache.spark.sql.functions._
-    println(s"company ${this.getClass.getSimpleName} calc start! " + new Date().toString)
-
     val (flag, time, kind, project) = tabMapping.getOrElse(sourceTable, ("", "", "", ""))
     if (flag.isEmpty || time.isEmpty || kind.isEmpty || project.isEmpty) {
       println("输入表不存在!!!   ")
       sys.exit(-1)
     }
+    (sourceTable, flag, time, kind, project)
+  }
+}
+
+case class CompanyCourtAnnouncement(s: SparkSession, sourceTable: String,
+                                    flag: String, time: String, kind: String, project: String
+                                   ) extends LoggingUtils {
+
+  @(transient@getter) val spark: SparkSession = s
+
+  import spark.implicits._
+  import spark._
+  import org.apache.spark.sql.functions._
+
+  def calc(): Unit = {
+    println(s"company ${this.getClass.getSimpleName} calc start! " + new Date().toString)
 
     val df = sql(
       s"""
@@ -64,9 +90,9 @@ object CompanyCourtAnnouncement {
     sql(s"insert overwrite table ${sourceTable}_score  select * from ${sourceTable}_tmp_view")
 
     println(s"company ${this.getClass.getSimpleName} calc end! " + new Date().toString)
-    spark.stop()
   }
 
+
   def trans(r: Row, flag: String, kind: String, prpject: String) = {
     val id = r.getAs[Long]("id")
     val cid = r.getAs[Long]("new_cid").toString

+ 112 - 0
src/main/scala/com/winhc/bigdata/spark/utils/LoggingUtils.scala

@@ -0,0 +1,112 @@
+package com.winhc.bigdata.spark.utils
+
+import java.io.PrintWriter
+
+import org.apache.commons.lang3.StringUtils
+import org.apache.log4j.Logger
+import org.apache.spark.sql.{DataFrame, SparkSession}
+
+import scala.annotation.meta.getter
+
+/**
+ * π
+ */
+trait LoggingUtils {
+  protected var sqlNo = 1
+
+  @transient protected[this] val logger: Logger = Logger.getLogger(this.getClass)
+
+  @(transient@getter) protected val spark: SparkSession
+
+  def sql(sqlString: String): DataFrame = {
+    logger.info(
+      s"""
+         |- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+         |Job[${this.getClass.getSimpleName}].SQL[No$sqlNo.]
+         |
+         |$sqlString
+         |- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+       """.stripMargin
+    )
+    sqlNo += 1
+    spark.sql(sqlString)
+  }
+
+  def showString(dataset: DataFrame, _numRows: Int = 20, truncate: Int = -1): String = {
+    val numRows = _numRows.max(0)
+    val takeResult = dataset.take(numRows + 1)
+    val hasMoreData = takeResult.length > numRows
+    val data = takeResult.take(numRows)
+
+    // For array values, replace Seq and Array with square brackets
+    // For cells that are beyond `truncate` characters, replace it with the
+    // first `truncate-3` and "..."
+    val rows: Seq[Seq[String]] = dataset.schema.fieldNames.toSeq +: data.map { row =>
+      row.toSeq.map { cell =>
+        val str = cell match {
+          case null => "null"
+          case binary: Array[Byte] => binary.map("%02X".format(_)).mkString("[", " ", "]")
+          case array: Array[_] => array.mkString("[", ", ", "]")
+          case seq: Seq[_] => seq.mkString("[", ", ", "]")
+          case _ => cell.toString
+        }
+        if (truncate > 0 && str.length > truncate) {
+          // do not show ellipses for strings shorter than 4 characters.
+          if (truncate < 4) str.substring(0, truncate)
+          else str.substring(0, truncate - 3) + "..."
+        } else {
+          str
+        }
+      }: Seq[String]
+    }
+
+    val sb = new StringBuilder
+    val numCols = dataset.schema.fieldNames.length
+
+    // Initialise the width of each column to a minimum value of '3'
+    val colWidths = Array.fill(numCols)(3)
+
+    // Compute the width of each column
+    for (row <- rows) {
+      for ((cell, i) <- row.zipWithIndex) {
+        colWidths(i) = math.max(colWidths(i), cell.length)
+      }
+    }
+
+    // Create SeparateLine
+    val sep: String = colWidths.map("-" * _).addString(sb, "+", "+", "+\n").toString()
+
+    // column names
+    rows.head.zipWithIndex.map { case (cell, i) =>
+      if (truncate > 0) {
+        StringUtils.leftPad(cell, colWidths(i))
+      } else {
+        StringUtils.rightPad(cell, colWidths(i))
+      }
+    }.addString(sb, "|", "|", "|\n")
+
+    sb.append(sep)
+
+    // data
+    rows.tail.map {
+      _.zipWithIndex.map { case (cell, i) =>
+        if (truncate > 0) {
+          StringUtils.leftPad(cell.toString, colWidths(i))
+        } else {
+          StringUtils.rightPad(cell.toString, colWidths(i))
+        }
+      }.addString(sb, "|", "|", "|\n")
+    }
+
+    sb.append(sep)
+
+    // For Data that has more than "numRows" records
+    if (hasMoreData) {
+      val rowsString = if (numRows == 1) "row" else "rows"
+      sb.append(s"only showing top $numRows $rowsString\n")
+    }
+
+    sb.toString()
+  }
+
+}