|
@@ -0,0 +1,278 @@
|
|
|
+package com.winhc.bigdata.spark.ng.dynamic
|
|
|
+
|
|
|
+import com.winhc.bigdata.spark.ng.dynamic.utils.BusinessTotalDynamicUtils
|
|
|
+import com.winhc.bigdata.spark.utils.BaseUtil.isWindows
|
|
|
+import com.winhc.bigdata.spark.utils.{BaseUtil, LoggingUtils, SparkUtils}
|
|
|
+import org.apache.spark.internal.Logging
|
|
|
+import org.apache.spark.rdd.RDD
|
|
|
+import org.apache.spark.sql.{DataFrame, SparkSession}
|
|
|
+import org.apache.spark.sql.functions.col
|
|
|
+
|
|
|
+import scala.annotation.meta.getter
|
|
|
+import scala.collection.mutable
|
|
|
+
|
|
|
+/**
|
|
|
+ * @author: XuJiakai
|
|
|
+ * @date: 2021/8/5 10:29
|
|
|
+ */
|
|
|
+case class BusinessTotalDynamic(s: SparkSession
|
|
|
+ ) extends LoggingUtils with Logging {
|
|
|
+ @(transient@getter) val spark: SparkSession = s
|
|
|
+
|
|
|
+ private val target_tab = "winhc_ng.out_company_dynamic_all"
|
|
|
+
|
|
|
+ private def getRdd: DataFrame = {
|
|
|
+ val df = sql(
|
|
|
+ s"""
|
|
|
+ |${generateAllTabSql("company_change", "winhc_ng")._1}
|
|
|
+ |AND change_time is not null
|
|
|
+ |AND content_before is not null
|
|
|
+ |AND content_after is not null
|
|
|
+ |AND content_after <> content_before
|
|
|
+ |""".stripMargin)
|
|
|
+ df.select(df.columns.map(column => col(column).cast("string")): _*)
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ def companyName(): RDD[CompanyDynamicRecord] = {
|
|
|
+
|
|
|
+ val rdd: RDD[CompanyDynamicRecord] = getRdd
|
|
|
+ .rdd
|
|
|
+ .filter(r => {
|
|
|
+ val company_id = r.getAs[String]("company_id")
|
|
|
+ val company_name = r.getAs[String]("company_name")
|
|
|
+ val category = r.getAs[String]("category")
|
|
|
+ val change_item = r.getAs[String]("change_item")
|
|
|
+ val change_info = r.getAs[String]("change_info")
|
|
|
+ val content_before = r.getAs[String]("content_before")
|
|
|
+ val content_after = r.getAs[String]("content_after")
|
|
|
+ val change_time = r.getAs[String]("change_time")
|
|
|
+ BusinessTotalDynamicUtils.companyNameFilter(category, change_item, change_info, content_before, content_after)
|
|
|
+ })
|
|
|
+ .flatMap(r => {
|
|
|
+ val company_id = r.getAs[String]("company_id")
|
|
|
+ val company_name = r.getAs[String]("company_name")
|
|
|
+ val category = r.getAs[String]("category")
|
|
|
+ val change_item = r.getAs[String]("change_item")
|
|
|
+ val change_info = r.getAs[String]("change_info")
|
|
|
+ val content_before = r.getAs[String]("content_before")
|
|
|
+ val content_after = r.getAs[String]("content_after")
|
|
|
+ val change_time = r.getAs[String]("change_time")
|
|
|
+ BusinessTotalDynamicUtils.companyNameTransform(company_id, company_name, category, change_item, change_info, content_before, content_after, change_time)
|
|
|
+ })
|
|
|
+ rdd
|
|
|
+ }
|
|
|
+
|
|
|
+ def registeredCapital(): RDD[CompanyDynamicRecord] = {
|
|
|
+ val rdd: RDD[CompanyDynamicRecord] = getRdd
|
|
|
+ .rdd
|
|
|
+ .filter(r => {
|
|
|
+ val company_id = r.getAs[String]("company_id")
|
|
|
+ val company_name = r.getAs[String]("company_name")
|
|
|
+ val category = r.getAs[String]("category")
|
|
|
+ val change_item = r.getAs[String]("change_item")
|
|
|
+ val change_info = r.getAs[String]("change_info")
|
|
|
+ val content_before = r.getAs[String]("content_before")
|
|
|
+ val content_after = r.getAs[String]("content_after")
|
|
|
+ val change_time = r.getAs[String]("change_time")
|
|
|
+ BusinessTotalDynamicUtils.registeredCapitalFilter(category, change_item, change_info, content_before, content_after)
|
|
|
+ })
|
|
|
+ .flatMap(r => {
|
|
|
+ val company_id = r.getAs[String]("company_id")
|
|
|
+ val company_name = r.getAs[String]("company_name")
|
|
|
+ val category = r.getAs[String]("category")
|
|
|
+ val change_item = r.getAs[String]("change_item")
|
|
|
+ val change_info = r.getAs[String]("change_info")
|
|
|
+ val content_before = r.getAs[String]("content_before")
|
|
|
+ val content_after = r.getAs[String]("content_after")
|
|
|
+ val change_time = r.getAs[String]("change_time")
|
|
|
+ BusinessTotalDynamicUtils.registeredCapitalTransform(company_id, company_name, category, change_item, change_info, content_before, content_after, change_time)
|
|
|
+ })
|
|
|
+ rdd
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ def registeredAddress(): RDD[CompanyDynamicRecord] = {
|
|
|
+ val rdd: RDD[CompanyDynamicRecord] = getRdd
|
|
|
+ .rdd
|
|
|
+ .filter(r => {
|
|
|
+ val company_id = r.getAs[String]("company_id")
|
|
|
+ val company_name = r.getAs[String]("company_name")
|
|
|
+ val category = r.getAs[String]("category")
|
|
|
+ val change_item = r.getAs[String]("change_item")
|
|
|
+ val change_info = r.getAs[String]("change_info")
|
|
|
+ val content_before = r.getAs[String]("content_before")
|
|
|
+ val content_after = r.getAs[String]("content_after")
|
|
|
+ val change_time = r.getAs[String]("change_time")
|
|
|
+ BusinessTotalDynamicUtils.registeredAddressFilter(category, change_item, change_info, content_before, content_after)
|
|
|
+ })
|
|
|
+ .flatMap(r => {
|
|
|
+ val company_id = r.getAs[String]("company_id")
|
|
|
+ val company_name = r.getAs[String]("company_name")
|
|
|
+ val category = r.getAs[String]("category")
|
|
|
+ val change_item = r.getAs[String]("change_item")
|
|
|
+ val change_info = r.getAs[String]("change_info")
|
|
|
+ val content_before = r.getAs[String]("content_before")
|
|
|
+ val content_after = r.getAs[String]("content_after")
|
|
|
+ val change_time = r.getAs[String]("change_time")
|
|
|
+ BusinessTotalDynamicUtils.registeredAddressTransform(company_id, company_name, category, change_item, change_info, content_before, content_after, change_time)
|
|
|
+ })
|
|
|
+ rdd
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ def businessScope(): RDD[CompanyDynamicRecord] = {
|
|
|
+ val rdd: RDD[CompanyDynamicRecord] = getRdd
|
|
|
+ .rdd
|
|
|
+ .filter(r => {
|
|
|
+ val company_id = r.getAs[String]("company_id")
|
|
|
+ val company_name = r.getAs[String]("company_name")
|
|
|
+ val category = r.getAs[String]("category")
|
|
|
+ val change_item = r.getAs[String]("change_item")
|
|
|
+ val change_info = r.getAs[String]("change_info")
|
|
|
+ val content_before = r.getAs[String]("content_before")
|
|
|
+ val content_after = r.getAs[String]("content_after")
|
|
|
+ val change_time = r.getAs[String]("change_time")
|
|
|
+ BusinessTotalDynamicUtils.businessScopeFilter(category, change_item, change_info, content_before, content_after)
|
|
|
+ })
|
|
|
+ .flatMap(r => {
|
|
|
+ val company_id = r.getAs[String]("company_id")
|
|
|
+ val company_name = r.getAs[String]("company_name")
|
|
|
+ val category = r.getAs[String]("category")
|
|
|
+ val change_item = r.getAs[String]("change_item")
|
|
|
+ val change_info = r.getAs[String]("change_info")
|
|
|
+ val content_before = r.getAs[String]("content_before")
|
|
|
+ val content_after = r.getAs[String]("content_after")
|
|
|
+ val change_time = r.getAs[String]("change_time")
|
|
|
+ BusinessTotalDynamicUtils.businessScopeTransform(company_id, company_name, category, change_item, change_info, content_before, content_after, change_time)
|
|
|
+ })
|
|
|
+ rdd
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ def legalRepresentative(): RDD[CompanyDynamicRecord] = {
|
|
|
+
|
|
|
+ val rdd: RDD[CompanyDynamicRecord] = getRdd
|
|
|
+ .rdd
|
|
|
+ .filter(r => {
|
|
|
+ val company_id = r.getAs[String]("company_id")
|
|
|
+ val company_name = r.getAs[String]("company_name")
|
|
|
+ val category = r.getAs[String]("category")
|
|
|
+ val change_item = r.getAs[String]("change_item")
|
|
|
+ val change_info = r.getAs[String]("change_info")
|
|
|
+ val content_before = r.getAs[String]("content_before")
|
|
|
+ val content_after = r.getAs[String]("content_after")
|
|
|
+ val change_time = r.getAs[String]("change_time")
|
|
|
+ BusinessTotalDynamicUtils.legalRepresentativeFilter(category, change_item, change_info, content_before, content_after)
|
|
|
+ })
|
|
|
+ .flatMap(r => {
|
|
|
+ val company_id = r.getAs[String]("company_id")
|
|
|
+ val company_name = r.getAs[String]("company_name")
|
|
|
+ val category = r.getAs[String]("category")
|
|
|
+ val change_item = r.getAs[String]("change_item")
|
|
|
+ val change_info = r.getAs[String]("change_info")
|
|
|
+ val content_before = r.getAs[String]("content_before")
|
|
|
+ val content_after = r.getAs[String]("content_after")
|
|
|
+ val change_time = r.getAs[String]("change_time")
|
|
|
+ BusinessTotalDynamicUtils.legalRepresentativeTransform(company_id, company_name, category, change_item, change_info, content_before, content_after, change_time)
|
|
|
+ })
|
|
|
+ rdd
|
|
|
+ }
|
|
|
+
|
|
|
+ def holder(): RDD[CompanyDynamicRecord] = {
|
|
|
+
|
|
|
+ val rdd: RDD[CompanyDynamicRecord] = getRdd
|
|
|
+ .rdd
|
|
|
+ .filter(r => {
|
|
|
+ val company_id = r.getAs[String]("company_id")
|
|
|
+ val company_name = r.getAs[String]("company_name")
|
|
|
+ val category = r.getAs[String]("category")
|
|
|
+ val change_item = r.getAs[String]("change_item")
|
|
|
+ val change_info = r.getAs[String]("change_info")
|
|
|
+ val content_before = r.getAs[String]("content_before")
|
|
|
+ val content_after = r.getAs[String]("content_after")
|
|
|
+ val change_time = r.getAs[String]("change_time")
|
|
|
+ BusinessTotalDynamicUtils.holderFilter(category, change_item, change_info, content_before, content_after)
|
|
|
+ })
|
|
|
+ .flatMap(r => {
|
|
|
+ val company_id = r.getAs[String]("company_id")
|
|
|
+ val company_name = r.getAs[String]("company_name")
|
|
|
+ val category = r.getAs[String]("category")
|
|
|
+ val change_item = r.getAs[String]("change_item")
|
|
|
+ val change_info = r.getAs[String]("change_info")
|
|
|
+ val content_before = r.getAs[String]("content_before")
|
|
|
+ val content_after = r.getAs[String]("content_after")
|
|
|
+ val change_time = r.getAs[String]("change_time")
|
|
|
+ BusinessTotalDynamicUtils.holderTransform(company_id, company_name, category, change_item, change_info, content_before, content_after, change_time)
|
|
|
+ })
|
|
|
+ rdd
|
|
|
+ }
|
|
|
+
|
|
|
+ def staff(): RDD[CompanyDynamicRecord] = {
|
|
|
+
|
|
|
+ val rdd: RDD[CompanyDynamicRecord] = getRdd
|
|
|
+ .rdd
|
|
|
+ .filter(r => {
|
|
|
+ val company_id = r.getAs[String]("company_id")
|
|
|
+ val company_name = r.getAs[String]("company_name")
|
|
|
+ val category = r.getAs[String]("category")
|
|
|
+ val change_item = r.getAs[String]("change_item")
|
|
|
+ val change_info = r.getAs[String]("change_info")
|
|
|
+ val content_before = r.getAs[String]("content_before")
|
|
|
+ val content_after = r.getAs[String]("content_after")
|
|
|
+ val change_time = r.getAs[String]("change_time")
|
|
|
+ BusinessTotalDynamicUtils.staffFilter(category, change_item, change_info, content_before, content_after)
|
|
|
+ })
|
|
|
+ .flatMap(r => {
|
|
|
+ val company_id = r.getAs[String]("company_id")
|
|
|
+ val company_name = r.getAs[String]("company_name")
|
|
|
+ val category = r.getAs[String]("category")
|
|
|
+ val change_item = r.getAs[String]("change_item")
|
|
|
+ val change_info = r.getAs[String]("change_info")
|
|
|
+ val content_before = r.getAs[String]("content_before")
|
|
|
+ val content_after = r.getAs[String]("content_after")
|
|
|
+ val change_time = r.getAs[String]("change_time")
|
|
|
+ BusinessTotalDynamicUtils.staffTransform(company_id, company_name, category, change_item, change_info, content_before, content_after, change_time)
|
|
|
+ })
|
|
|
+ rdd
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ def calc(): Unit = {
|
|
|
+
|
|
|
+ val value = companyName()
|
|
|
+ .union(registeredCapital())
|
|
|
+ .union(registeredAddress())
|
|
|
+ .union(businessScope())
|
|
|
+ .union(legalRepresentative())
|
|
|
+ // val value = legalRepresentative().union(holder()).union(staff())
|
|
|
+ // val value = holder().union(staff())
|
|
|
+
|
|
|
+
|
|
|
+ spark.createDataFrame(value.map(_.to_row()), spark.table(target_tab).schema)
|
|
|
+ .createTempView("company_dynamic_out_tab")
|
|
|
+ val cols = getColumns(target_tab).diff(Seq("ds")).mkString(",")
|
|
|
+
|
|
|
+ sql(
|
|
|
+ s"""
|
|
|
+ |INSERT ${if (isWindows) "INTO" else "OVERWRITE"} TABLE $target_tab PARTITION(ds='${BaseUtil.getYesterday()}_business')
|
|
|
+ |SELECT $cols
|
|
|
+ |FROM company_dynamic_out_tab
|
|
|
+ |""".stripMargin)
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+object BusinessTotalDynamic {
|
|
|
+
|
|
|
+
|
|
|
+ def main(args: Array[String]): Unit = {
|
|
|
+ val config = mutable.Map(
|
|
|
+ "spark.hadoop.odps.project.name" -> "winhc_ng",
|
|
|
+ "spark.debug.maxToStringFields" -> "200",
|
|
|
+ "spark.hadoop.odps.spark.local.partition.amt" -> "100000"
|
|
|
+ )
|
|
|
+ val spark = SparkUtils.InitEnv(this.getClass.getSimpleName, config)
|
|
|
+ BusinessTotalDynamic(spark).calc()
|
|
|
+ spark.stop()
|
|
|
+ }
|
|
|
+}
|