|
@@ -3,7 +3,6 @@ package com.winhc.bigdata.spark.jobs.chance
|
|
|
import com.winhc.bigdata.spark.config.EsConfig
|
|
|
import com.winhc.bigdata.spark.utils.BaseUtil.isWindows
|
|
|
import com.winhc.bigdata.spark.utils.{BaseUtil, LoggingUtils, ReflectUtils, SparkUtils}
|
|
|
-import org.apache.commons.lang3.StringUtils
|
|
|
import org.apache.spark.internal.Logging
|
|
|
import org.apache.spark.sql.functions.col
|
|
|
import org.apache.spark.sql.types.{MapType, StringType, StructField, StructType}
|
|
@@ -239,17 +238,52 @@ object ChangeExtract {
|
|
|
|
|
|
|
|
|
// winhc_eci_dev company cid 20200630 legal_entity_id,reg_location,business_scope,reg_status,reg_capital,emails,phones
|
|
|
+
|
|
|
+
|
|
|
+ private val startArgs = Seq(
|
|
|
+ Args(tableName = "company_tm", primaryFields = "status_new")
|
|
|
+ , Args(tableName = "company_patent_list", primaryFields = "lprs")
|
|
|
+ , Args(tableName = "company_copyright_works_list", primaryFields = "type")
|
|
|
+ , Args(tableName = "company_copyright_reg_list", primaryFields = "version")
|
|
|
+ , Args(tableName = "company_land_publicity", primaryFields = "title,location,use_for")
|
|
|
+ , Args(tableName = "company_land_announcement", primaryFields = "e_number,project_name")
|
|
|
+ , Args(tableName = "company_bid_list", primaryFields = "title")
|
|
|
+ , Args(tableName = "company_land_transfer", primaryFields = "num,location")
|
|
|
+ , Args(tableName = "company_employment", primaryFields = "source")
|
|
|
+ , Args(tableName = "company_env_punishment", primaryFields = "punish_number")
|
|
|
+ , Args(tableName = "company_icp", primaryFields = "domain")
|
|
|
+ , Args(tableName = "company_punishment_info", primaryFields = "punish_number")
|
|
|
+ , Args(tableName = "company_punishment_info_creditchina", primaryFields = "punish_number")
|
|
|
+ , Args(tableName = "bankruptcy_open_case", primaryFields = "case_no")//破产重整
|
|
|
+
|
|
|
+ , Args(tableName = "company_certificate", primaryFields = "type")
|
|
|
+ , Args(tableName = "company_abnormal_info", primaryFields = "remove_reason")
|
|
|
+
|
|
|
+ , Args(tableName = "company_own_tax", primaryFields = "tax_balance,tax_category,tax_num")
|
|
|
+
|
|
|
+ , Args(tableName = "company_equity_info", primaryKey = "id", primaryFields = "reg_number", isCopy = false)
|
|
|
+ // , Args(tableName = "company_staff", primaryFields = "staff_type")
|
|
|
+
|
|
|
+ )
|
|
|
+
|
|
|
+ private case class Args(project: String = "winhc_eci_dev"
|
|
|
+ , tableName: String
|
|
|
+ , primaryKey: String = "rowkey"
|
|
|
+ , primaryFields: String
|
|
|
+ , isCopy: Boolean = true)
|
|
|
+
|
|
|
def main(args: Array[String]): Unit = {
|
|
|
- if (args.length >= 5 && args.length <= 6) {
|
|
|
- val Array(project, tableName, rowkey, inc_ds, pf, isCopy) = if (args.length == 6) args else args :+ "true"
|
|
|
+ if (args.length == 2) {
|
|
|
+ val Array(tableName, inc_ds) = args
|
|
|
+
|
|
|
+ val e = startArgs.filter(_.tableName.equals(tableName)).head
|
|
|
val config = EsConfig.getEsConfigMap ++ mutable.Map(
|
|
|
- "spark.hadoop.odps.project.name" -> project,
|
|
|
+ "spark.hadoop.odps.project.name" -> e.project,
|
|
|
"spark.hadoop.odps.spark.local.partition.amt" -> "10"
|
|
|
)
|
|
|
val spark = SparkUtils.InitEnv("ChangeExtract", config)
|
|
|
|
|
|
-
|
|
|
- ChangeExtractHandle(spark, project, tableName, rowkey, inc_ds, pf.split(",")).calc(isCopy.toBoolean)
|
|
|
+ ChangeExtractHandle(spark, e.project, tableName, e.primaryKey, inc_ds, e.primaryFields.split(",")).calc(e.isCopy)
|
|
|
spark.stop()
|
|
|
} else {
|
|
|
val ds = args(0)
|
|
@@ -259,30 +293,32 @@ object ChangeExtract {
|
|
|
"spark.hadoop.odps.spark.local.partition.amt" -> "10"
|
|
|
)
|
|
|
val spark = SparkUtils.InitEnv("ChangeExtract", config)
|
|
|
- val rows =
|
|
|
- """winhc_eci_dev company_tm rowkey 20200717 status_new
|
|
|
- |winhc_eci_dev company_patent_list rowkey 20200717 lprs
|
|
|
- |winhc_eci_dev company_copyright_works_list rowkey 20200717 type
|
|
|
- |winhc_eci_dev company_copyright_reg_list rowkey 20200717 version
|
|
|
- |winhc_eci_dev company_land_publicity rowkey 20200717 title,location,use_for
|
|
|
- |winhc_eci_dev company_land_announcement rowkey 20200717 e_number,project_name
|
|
|
- |winhc_eci_dev company_land_transfer rowkey 20200717 num,location
|
|
|
- |winhc_eci_dev company_land_mortgage rowkey 20200717 land_num,land_aministrative_area
|
|
|
- |winhc_eci_dev company_bid_list rowkey 20200717 title
|
|
|
- |winhc_eci_dev company_punishment_info rowkey 20200717 punish_number,reg_number
|
|
|
- |winhc_eci_dev company_punishment_info_creditchina rowkey 20200717 punish_number
|
|
|
- |winhc_eci_dev company_employment rowkey 20200717 source
|
|
|
- |winhc_eci_dev company_env_punishment rowkey 20200717 punish_number
|
|
|
- |winhc_eci_dev bankruptcy_open_case id 20200717 case_no
|
|
|
- |winhc_eci_dev company_icp rowkey 20200717 domain
|
|
|
- |""".stripMargin.replace("20200717", ds)
|
|
|
- for (r <- rows.split("\r\n")) {
|
|
|
- if (StringUtils.isNotEmpty(r)) {
|
|
|
- val as = r.split(" ")
|
|
|
- val Array(tmp, tableName, rowkey, inc_ds, pf, isCopy) = if (as.length == 6) as else as :+ "true"
|
|
|
- ChangeExtractHandle(spark, project, tableName, rowkey, inc_ds, pf.split(",")).calc(isCopy.toBoolean)
|
|
|
- }
|
|
|
- }
|
|
|
+
|
|
|
+ startArgs.foreach(e => {
|
|
|
+ ChangeExtractHandle(spark, e.project, e.tableName, e.primaryKey, ds, e.primaryFields.split(",")).calc(e.isCopy)
|
|
|
+ })
|
|
|
+
|
|
|
+
|
|
|
+ /* val rows =
|
|
|
+ """winhc_eci_dev company_tm rowkey 20200717 status_new
|
|
|
+ |winhc_eci_dev company_patent_list rowkey 20200717 lprs
|
|
|
+ |winhc_eci_dev company_copyright_works_list rowkey 20200717 type
|
|
|
+ |winhc_eci_dev company_copyright_reg_list rowkey 20200717 version
|
|
|
+ |winhc_eci_dev company_land_publicity rowkey 20200717 title,location,use_for
|
|
|
+ |winhc_eci_dev company_land_announcement rowkey 20200717 e_number,project_name
|
|
|
+ |winhc_eci_dev company_bid_list rowkey 20200717 title
|
|
|
+ |winhc_eci_dev company_land_transfer rowkey 20200717 num,location
|
|
|
+ |winhc_eci_dev company_employment rowkey 20200717 source
|
|
|
+ |winhc_eci_dev company_env_punishment rowkey 20200717 punish_number
|
|
|
+ |winhc_eci_dev company_icp rowkey 20200717 domain
|
|
|
+ |""".stripMargin.replace("20200717", ds)
|
|
|
+ for (r <- rows.split("\r\n")) {
|
|
|
+ if (StringUtils.isNotEmpty(r)) {
|
|
|
+ val as = r.split(" ")
|
|
|
+ val Array(tmp, tableName, rowkey, inc_ds, pf, isCopy) = if (as.length == 6) as else as :+ "true"
|
|
|
+ ChangeExtractHandle(spark, project, tableName, rowkey, inc_ds, pf.split(",")).calc(isCopy.toBoolean)
|
|
|
+ }
|
|
|
+ }*/
|
|
|
spark.stop()
|
|
|
}
|
|
|
}
|