|
@@ -93,11 +93,27 @@ object CompanyIncCompany2Es {
|
|
|
.seq
|
|
|
|
|
|
//读取数据
|
|
|
+ /* val df = sql(
|
|
|
+ s"""
|
|
|
+ |SELECT ${companyCols.mkString(",")}
|
|
|
+ |FROM $project.inc_ods_company
|
|
|
+ |WHERE ds > $start_partition and ds <= $end_partition
|
|
|
+ |""".stripMargin) */
|
|
|
+
|
|
|
+ // 去除数据本身重复
|
|
|
val df = sql(
|
|
|
s"""
|
|
|
|SELECT ${companyCols.mkString(",")}
|
|
|
- |FROM $project.inc_ods_company
|
|
|
- |WHERE ds > $start_partition and ds <= $end_partition
|
|
|
+ |FROM (
|
|
|
+ | SELECT a.*
|
|
|
+ | ,row_number() OVER (PARTITION BY a.cid ORDER BY update_time DESC) c
|
|
|
+ | FROM (
|
|
|
+ | SELECT *
|
|
|
+ | FROM $project.inc_ods_company
|
|
|
+ | WHERE ds > $start_partition and ds <= $end_partition
|
|
|
+ | ) as a
|
|
|
+ | ) AS tmp
|
|
|
+ |WHERE tmp.c = 1
|
|
|
|""".stripMargin)
|
|
|
|
|
|
df.createOrReplaceTempView("tmp_company_inc")
|
|
@@ -126,7 +142,9 @@ object CompanyIncCompany2Es {
|
|
|
}
|
|
|
}
|
|
|
(new ImmutableBytesWritable, put)
|
|
|
- }).filter(_ != null).saveAsHadoopDataset(jobConf)
|
|
|
+ }).filter(_ != null)
|
|
|
+ .saveAsNewAPIHadoopDataset(jobConf)
|
|
|
+ // .saveAsHadoopDataset(jobConf)
|
|
|
|
|
|
//写出到es
|
|
|
import com.winhc.bigdata.spark.utils.CompanyEsUtils.getEsDoc
|