|
@@ -0,0 +1,47 @@
|
|
|
+package com.winhc.bigdata.spark.jobs
|
|
|
+
|
|
|
+import com.winhc.bigdata.spark.utils.{CompanyIncrForCidUtils, CompanyIncrForCidsUtils, SparkUtils}
|
|
|
+import org.apache.spark.sql.SparkSession
|
|
|
+
|
|
|
+import scala.collection.mutable
|
|
|
+
|
|
|
+/**
|
|
|
+ * @Description: 增量数据入口类
|
|
|
+ * @author π
|
|
|
+ * @date 2020/6/2810:43
|
|
|
+ */
|
|
|
+object CalcIncrTotal {
|
|
|
+ //winhc_eci_dev company_icp liscense,domain,new_cid cid
|
|
|
+ //winhc_eci_dev company_app_info icon_oss_path,brief,name,new_cid cid
|
|
|
+ //winhc_eci_dev ads_company_tm app_date,tm_name,reg_no,new_cid cid
|
|
|
+ //winhc_eci_dev company_wechat title,public_num,new_cid cid
|
|
|
+
|
|
|
+ //winhc_eci_dev company_copyright_reg reg_num,full_name,cat_num,new_cid cids
|
|
|
+ //winhc_eci_dev company_copyright_works reg_num,name,type,new_cid cids
|
|
|
+ //winhc_eci_dev company_patent app_number,pub_number,title,new_cid cids
|
|
|
+ def main(args: Array[String]): Unit = {
|
|
|
+
|
|
|
+ val Array(project, tableName, dupliCols, flag) = args
|
|
|
+ println(
|
|
|
+ s"""
|
|
|
+ |project: $project
|
|
|
+ |tableName: $tableName
|
|
|
+ |dupliCols: $dupliCols
|
|
|
+ |flag: $flag
|
|
|
+ |""".stripMargin)
|
|
|
+ if (args.length != 4) {
|
|
|
+ println("请输入 project:项目, tableName:表名, dupliCols:去重字段, flag:标识 !!!")
|
|
|
+ sys.exit(-1)
|
|
|
+ }
|
|
|
+ val config = mutable.Map(
|
|
|
+ "spark.hadoop.odps.project.name" -> "winhc_eci_dev",
|
|
|
+ "spark.hadoop.odps.spark.local.partition.amt" -> "10"
|
|
|
+ )
|
|
|
+ val spark: SparkSession = SparkUtils.InitEnv(this.getClass.getSimpleName, config)
|
|
|
+ flag match {
|
|
|
+ case "cid" => CompanyIncrForCidUtils(spark, project, tableName, (dupliCols.split(",").toSeq)).calc()
|
|
|
+ case "cids" => CompanyIncrForCidsUtils(spark, project, tableName, tableName + "_list", dupliCols.split(",").seq).calc()
|
|
|
+ }
|
|
|
+ spark.stop()
|
|
|
+ }
|
|
|
+}
|