xufei 5 лет назад
Родитель
Сommit
71bdf5869c

+ 2 - 13
src/main/scala/com/winhc/bigdata/spark/jobs/CompanyIncrForCids.scala

@@ -10,33 +10,22 @@ import scala.collection.mutable
  * π
  */
 
-//reg_num,full_name,cat_num,new_cid
-// winhc_eci_dev.inc_ods_company
-// winhc_eci_dev.ads_company_copyright_reg_list
-// winhc_eci_dev.inc_ods_company_copyright_reg
-// winhc_eci_dev.inc_ads_company_copyright_reg
-// winhc_eci_dev.inc_ads_company_copyright_reg_list
 object CompanyIncrForCids {
 
   //winhc_eci_dev company_copyright_reg company_copyright_reg_list reg_num,full_name,cat_num,new_cid
+  //winhc_eci_dev company_copyright_works company_copyright_works_list reg_num,name,type,new_cid
+  //winhc_eci_dev company_patent company_patent_list app_number,pub_number,title,new_cid
   def main(args: Array[String]): Unit = {
 
     val Array(project,mainTableName,sublistTableName,dupliCols) = args
 
-//    val (cols, t1, t2, t3, t4, t5) = valid(args)
     var config = mutable.Map(
       "spark.hadoop.odps.project.name" -> "winhc_eci_dev",
       "spark.hadoop.odps.spark.local.partition.amt" -> "10"
     )
     val spark: SparkSession = SparkUtils.InitEnv(this.getClass.getSimpleName, config)
     CompanyIncrForCidsUtils(spark,project,mainTableName,sublistTableName,dupliCols.split(",").seq).calc()
-//    CompanyIncrForCidsUtils(spark, t1, t2, t3, t4, t5, cols).calc()
     spark.stop()
   }
 
-  def valid(args: Array[String]): (Seq[String], String, String, String, String, String) = {
-    println(args.toSeq.mkString(" "))
-    val Array(cols, t1, t2, t3, t4, t5) = args
-    (cols.split(",").toSeq, t1, t2, t3, t4, t5)
-  }
 }

+ 2 - 2
src/main/scala/com/winhc/bigdata/spark/utils/CompanyIncrForCidsUtils.scala

@@ -116,9 +116,9 @@ case class CompanyIncrForCidsUtils(s: SparkSession,
     sql(
       s"""
          |INSERT OVERWRITE TABLE  $target_inc_ads_company_tb PARTITION(ds='$firstDs')
-         |SELECT  ${columns.mkString(",")}
+         |SELECT  cids,${columns.mkString(",")}
          |FROM    (
-         |            SELECT  ${columns.mkString(",")}
+         |            SELECT  cids,${columns.mkString(",")}
          |                    ,ROW_NUMBER() OVER (PARTITION BY id ORDER BY update_time DESC ) num
          |            FROM    ${inc_ods_company_tb}
          |            WHERE   ds >= ${firstDs}