|
@@ -68,6 +68,17 @@ object CompanyIncCompany2Es {
|
|
, "UPDATE_TIME"
|
|
, "UPDATE_TIME"
|
|
, "DELETED"
|
|
, "DELETED"
|
|
)
|
|
)
|
|
|
|
+ val outFields_Human = Seq(
|
|
|
|
+ ,"NEW_CID"
|
|
|
|
+ ,"CID"
|
|
|
|
+ ,"ID"
|
|
|
|
+ ,"COMPANY_NAME"
|
|
|
|
+ ,"HUMAN_NAME"
|
|
|
|
+ ,"HID"
|
|
|
|
+ ,"HUMAN_PID"
|
|
|
|
+ ,"STATUS"
|
|
|
|
+ ,"CREATE_TIME"
|
|
|
|
+ )
|
|
|
|
|
|
case class Company2Es(s: SparkSession, project: String, bizDate: String) extends LoggingUtils with BaseFunc {
|
|
case class Company2Es(s: SparkSession, project: String, bizDate: String) extends LoggingUtils with BaseFunc {
|
|
@(transient@getter) val spark: SparkSession = s
|
|
@(transient@getter) val spark: SparkSession = s
|
|
@@ -144,6 +155,78 @@ object CompanyIncCompany2Es {
|
|
|
|
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
+ case class Company_Human_Relation2HBase(s: SparkSession, project: String, bizDate: String) extends LoggingUtils {
|
|
|
|
+ @(transient@getter) val spark: SparkSession = s
|
|
|
|
+
|
|
|
|
+ def calc() {
|
|
|
|
+ val partition = bizDate.replaceAll("\\-", "")
|
|
|
|
+ if (partition.length != 8) {
|
|
|
|
+ println("biz date is error!")
|
|
|
|
+ sys.exit(-99)
|
|
|
|
+ }
|
|
|
|
+ val inc_ods_partitions = BaseUtil.getPartitions(s"${project}.inc_ods_company_human_relation", spark)
|
|
|
|
+ val end_partition = if (inc_ods_partitions.isEmpty) partition else inc_ods_partitions.last
|
|
|
|
+
|
|
|
|
+ val inc_ads_partitions = BaseUtil.getPartitions(s"${project}.inc_ads_company_human_relation", spark)
|
|
|
|
+ val start_partition = if (inc_ads_partitions.isEmpty) '0' else inc_ads_partitions.last
|
|
|
|
+
|
|
|
|
+ if (start_partition.equals(end_partition)) {
|
|
|
|
+ println("start_partition == end_partition")
|
|
|
|
+ sys.exit(-999)
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ val companyCols = spark.table("ads_company_human_relation").columns
|
|
|
|
+ .filter(!_.equals("ds"))
|
|
|
|
+ .seq
|
|
|
|
+
|
|
|
|
+ //读取数据
|
|
|
|
+ // 去除数据本身重复
|
|
|
|
+ val df = sql(
|
|
|
|
+ s"""
|
|
|
|
+ |SELECT ${companyCols.mkString(",")}
|
|
|
|
+ |FROM (
|
|
|
|
+ | SELECT a.*
|
|
|
|
+ | ,row_number() OVER (PARTITION BY a.cid,a.hid,a.human_pid ORDER BY update_time DESC) c
|
|
|
|
+ | FROM (
|
|
|
|
+ | SELECT *
|
|
|
|
+ | FROM $project.inc_ods_company_human_relation
|
|
|
|
+ | WHERE ds > $start_partition and ds <= $end_partition and cid is not null
|
|
|
|
+ | ) as a
|
|
|
|
+ | ) AS tmp
|
|
|
|
+ |WHERE tmp.c = 1
|
|
|
|
+ |""".stripMargin)
|
|
|
|
+
|
|
|
|
+ df.cache().createOrReplaceTempView("tmp_company_human_relation_inc")
|
|
|
|
+
|
|
|
|
+ //写出到ads
|
|
|
|
+ sql(
|
|
|
|
+ s"""
|
|
|
|
+ |INSERT ${if (BaseUtil.isWindows) "INTO" else "OVERWRITE"} TABLE ${project}.inc_ads_company_human_relation PARTITION(ds='$end_partition')
|
|
|
|
+ |SELECT ${companyCols.mkString(",")}
|
|
|
|
+ |FROM
|
|
|
|
+ | tmp_company_human_relation_inc
|
|
|
|
+ |""".stripMargin)
|
|
|
|
+
|
|
|
|
+ import spark.implicits._
|
|
|
|
+ //写出到hbase
|
|
|
|
+ import org.apache.spark.sql.functions.col
|
|
|
|
+ val jobConf = HBaseConfig.HBaseOutputJobConf("COMPANY_HUMAN_RELATION")
|
|
|
|
+ val stringDf = df.select(companyCols.map(column => col(column).cast("string")): _*)
|
|
|
|
+ stringDf.rdd.map(row => {
|
|
|
|
+ val id = row.getAs[String]("rowkey")
|
|
|
|
+ val put = new Put(Bytes.toBytes(id))
|
|
|
|
+ for (f <- outFields_Human) {
|
|
|
|
+ val v = row.getAs[String](f.toLowerCase)
|
|
|
|
+ if (v != null) {
|
|
|
|
+ put.addColumn(BaseConst.F_BYTES, Bytes.toBytes(f), Bytes.toBytes(v))
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ (new ImmutableBytesWritable, put)
|
|
|
|
+ }).filter(_ != null)
|
|
|
|
+ .saveAsHadoopDataset(jobConf)
|
|
|
|
+
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
|
|
def main(args: Array[String]): Unit = {
|
|
def main(args: Array[String]): Unit = {
|
|
if (args.length != 2) {
|
|
if (args.length != 2) {
|