|
@@ -2,7 +2,7 @@ package com.winhc.bigdata.spark.jobs
|
|
|
|
|
|
import com.winhc.bigdata.spark.config.EsConfig
|
|
|
import com.winhc.bigdata.spark.udf.{BaseFunc, CompanyStaffAggs}
|
|
|
-import com.winhc.bigdata.spark.utils.{LoggingUtils, SparkUtils}
|
|
|
+import com.winhc.bigdata.spark.utils.{LoggingUtils, MaxComputer2Phoenix, SparkUtils}
|
|
|
import org.apache.spark.internal.Logging
|
|
|
import org.apache.spark.sql.SparkSession
|
|
|
|
|
@@ -16,7 +16,6 @@ import scala.collection.mutable
|
|
|
*/
|
|
|
object company_staff {
|
|
|
|
|
|
-
|
|
|
case class CompanyStaffUtil(s: SparkSession,
|
|
|
project: String
|
|
|
) extends LoggingUtils with Logging with BaseFunc {
|
|
@@ -26,43 +25,43 @@ object company_staff {
|
|
|
def init(): Unit = {
|
|
|
cleanup()
|
|
|
spark.udf.register("agg_val", new CompanyStaffAggs())
|
|
|
- sql(
|
|
|
- """
|
|
|
- |CREATE TABLE IF NOT EXISTS winhc_eci_dev.ads_company_staff
|
|
|
- |(
|
|
|
- | rowkey STRING COMMENT 'FIELD'
|
|
|
- | ,new_cid STRING COMMENT 'FIELD'
|
|
|
- | ,id BIGINT COMMENT ''
|
|
|
- | ,cid BIGINT COMMENT ''
|
|
|
- | ,hid BIGINT COMMENT ''
|
|
|
- | ,staff_type STRING COMMENT ''
|
|
|
- | ,create_time DATETIME COMMENT ''
|
|
|
- | ,update_time DATETIME COMMENT ''
|
|
|
- | ,deleted BIGINT COMMENT ''
|
|
|
- |)
|
|
|
- |COMMENT 'TABLE COMMENT'
|
|
|
- |PARTITIONED BY
|
|
|
- |(
|
|
|
- | ds STRING COMMENT '分区'
|
|
|
- |)
|
|
|
- |""".stripMargin)
|
|
|
+ /* sql(
|
|
|
+ """
|
|
|
+ |CREATE TABLE IF NOT EXISTS winhc_eci_dev.ads_company_staff
|
|
|
+ |(
|
|
|
+ | rowkey STRING COMMENT 'FIELD'
|
|
|
+ | ,new_cid STRING COMMENT 'FIELD'
|
|
|
+ | ,id BIGINT COMMENT ''
|
|
|
+ | ,cid BIGINT COMMENT ''
|
|
|
+ | ,hid BIGINT COMMENT ''
|
|
|
+ | ,staff_type STRING COMMENT ''
|
|
|
+ | ,create_time DATETIME COMMENT ''
|
|
|
+ | ,update_time DATETIME COMMENT ''
|
|
|
+ | ,deleted BIGINT COMMENT ''
|
|
|
+ |)
|
|
|
+ |COMMENT 'TABLE COMMENT'
|
|
|
+ |PARTITIONED BY
|
|
|
+ |(
|
|
|
+ | ds STRING COMMENT '分区'
|
|
|
+ |)
|
|
|
+ |""".stripMargin)*/
|
|
|
|
|
|
- sql(
|
|
|
- s"""
|
|
|
- |CREATE TABLE IF NOT EXISTS `winhc_eci_dev`.`inc_ads_company_staff` (
|
|
|
- | `rowkey` STRING COMMENT 'FIELD',
|
|
|
- | `new_cid` STRING COMMENT 'FIELD',
|
|
|
- | `id` BIGINT,
|
|
|
- | `cid` BIGINT,
|
|
|
- | `hid` BIGINT,
|
|
|
- | `staff_type` STRING,
|
|
|
- | `create_time` DATETIME,
|
|
|
- | `update_time` DATETIME,
|
|
|
- | `deleted` BIGINT)
|
|
|
- | COMMENT 'TABLE COMMENT'
|
|
|
- |PARTITIONED BY (
|
|
|
- | `ds` STRING COMMENT '分区')
|
|
|
- |""".stripMargin)
|
|
|
+ /* sql(
|
|
|
+ s"""
|
|
|
+ |CREATE TABLE IF NOT EXISTS `winhc_eci_dev`.`inc_ads_company_staff` (
|
|
|
+ | `rowkey` STRING COMMENT 'FIELD',
|
|
|
+ | `new_cid` STRING COMMENT 'FIELD',
|
|
|
+ | `id` BIGINT,
|
|
|
+ | `cid` BIGINT,
|
|
|
+ | `hid` BIGINT,
|
|
|
+ | `staff_type` STRING,
|
|
|
+ | `create_time` DATETIME,
|
|
|
+ | `update_time` DATETIME,
|
|
|
+ | `deleted` BIGINT)
|
|
|
+ | COMMENT 'TABLE COMMENT'
|
|
|
+ |PARTITIONED BY (
|
|
|
+ | `ds` STRING COMMENT '分区')
|
|
|
+ |""".stripMargin)*/
|
|
|
}
|
|
|
|
|
|
|
|
@@ -205,6 +204,7 @@ object company_staff {
|
|
|
| SELECT *
|
|
|
| FROM winhc_eci_dev.inc_ods_company_staff
|
|
|
| WHERE ds = '$ds'
|
|
|
+ | AND cid is not null
|
|
|
| ) AS ta1
|
|
|
| LEFT JOIN mapping AS ta2
|
|
|
| ON ta1.cid = ta2.cid
|
|
@@ -217,27 +217,82 @@ object company_staff {
|
|
|
sql(
|
|
|
s"""
|
|
|
|INSERT OVERWRITE TABLE winhc_eci_dev.inc_ads_company_staff PARTITION(ds=$ds)
|
|
|
- |SELECT t2.rowkey
|
|
|
- | ,t2.new_cid
|
|
|
- | ,t2.id
|
|
|
- | ,t2.cid
|
|
|
- | ,t2.hid
|
|
|
- | ,t3.types AS staff_type
|
|
|
- | ,t2.create_time
|
|
|
- | ,t2.update_time
|
|
|
- | ,t2.deleted
|
|
|
- |FROM inc_ads_tmp AS t2
|
|
|
- |LEFT JOIN (
|
|
|
- | SELECT t1.new_cid
|
|
|
- | ,t1.hid
|
|
|
- | ,agg_val(t1.staff_type) AS types
|
|
|
- | FROM inc_ads_tmp AS t1
|
|
|
- | GROUP BY t1.new_cid
|
|
|
- | ,t1.hid
|
|
|
- | ) AS t3
|
|
|
- |ON CONCAT_WS('',t2.hid,t2.new_cid) = CONCAT_WS('',t3.hid,t3.new_cid)
|
|
|
+ |SELECT t5.rowkey
|
|
|
+ | ,t5.new_cid
|
|
|
+ | ,t5.id
|
|
|
+ | ,t5.cid
|
|
|
+ | ,t5.hid
|
|
|
+ | ,t5.staff_type
|
|
|
+ | ,t5.create_time
|
|
|
+ | ,t5.update_time
|
|
|
+ | ,t5.deleted
|
|
|
+ |FROM (
|
|
|
+ | SELECT t4.*
|
|
|
+ | ,ROW_NUMBER() OVER (PARTITION BY t4.rowkey ORDER BY t4.rowkey DESC ) num
|
|
|
+ | FROM (
|
|
|
+ | SELECT t2.rowkey
|
|
|
+ | ,t2.new_cid
|
|
|
+ | ,t2.id
|
|
|
+ | ,t2.cid
|
|
|
+ | ,t2.hid
|
|
|
+ | ,t3.types AS staff_type
|
|
|
+ | ,t2.create_time
|
|
|
+ | ,t2.update_time
|
|
|
+ | ,t2.deleted
|
|
|
+ | FROM inc_ads_tmp AS t2
|
|
|
+ | LEFT JOIN (
|
|
|
+ | SELECT t1.new_cid
|
|
|
+ | ,t1.hid
|
|
|
+ | ,agg_val(t1.staff_type) AS types
|
|
|
+ | FROM inc_ads_tmp AS t1
|
|
|
+ | GROUP BY t1.new_cid
|
|
|
+ | ,t1.hid
|
|
|
+ | ) AS t3
|
|
|
+ | ON CONCAT_WS('',t2.hid,t2.new_cid) = CONCAT_WS('',t3.hid,t3.new_cid)
|
|
|
+ | ) AS t4
|
|
|
+ | ) AS t5
|
|
|
+ |WHERE t5.num = 1
|
|
|
|""".stripMargin)
|
|
|
|
|
|
+
|
|
|
+ MaxComputer2Phoenix(spark, Seq("new_cid"
|
|
|
+ , "hid"
|
|
|
+ , "staff_type"
|
|
|
+ , "create_time"
|
|
|
+ , "update_time"
|
|
|
+ , "deleted"), "winhc_eci_dev.ads_company_staff", "COMPANY_STAFF", ds, "rowkey").syn()
|
|
|
+ }
|
|
|
+
|
|
|
+ def inc_bulk_save(startDs: String): Unit = {
|
|
|
+ import com.winhc.bigdata.spark.implicits.PhoenixHelper._
|
|
|
+ sql(
|
|
|
+ s"""
|
|
|
+ |SELECT tmp.rowkey
|
|
|
+ | ,tmp.new_cid as cid
|
|
|
+ | ,tmp.hid
|
|
|
+ | ,tmp.staff_type
|
|
|
+ | ,tmp.create_time
|
|
|
+ | ,tmp.update_time
|
|
|
+ | ,tmp.deleted
|
|
|
+ |FROM (
|
|
|
+ | SELECT *
|
|
|
+ | ,ROW_NUMBER() OVER(PARTITION BY rowkey ORDER BY ds DESC ) AS c
|
|
|
+ | FROM (
|
|
|
+ | SELECT *
|
|
|
+ | FROM winhc_eci_dev.inc_ads_company_staff
|
|
|
+ | WHERE ds >= $startDs
|
|
|
+ | AND new_cid is not null
|
|
|
+ | )
|
|
|
+ | ) AS tmp
|
|
|
+ |WHERE tmp.c = 1
|
|
|
+ |""".stripMargin).repartition(400)
|
|
|
+ .save2PhoenixByJDBC(s"COMPANY_STAFF", Seq("rowkey"
|
|
|
+ , "cid"
|
|
|
+ , "hid"
|
|
|
+ , "staff_type"
|
|
|
+ , "create_time"
|
|
|
+ , "update_time"
|
|
|
+ , "deleted"))
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -252,12 +307,16 @@ object company_staff {
|
|
|
|
|
|
val e = CompanyStaffUtil(spark, project)
|
|
|
e.init()
|
|
|
+
|
|
|
+// e.inc_bulk_save("20200603")
|
|
|
+
|
|
|
if (args.length == 1) {
|
|
|
val Array(ds) = args
|
|
|
e.inc(ds)
|
|
|
} else {
|
|
|
e.inc()
|
|
|
}
|
|
|
+
|
|
|
spark.stop()
|
|
|
}
|
|
|
}
|