|
@@ -65,20 +65,18 @@ case class NgCompanyDynamic(s: SparkSession,
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- def calc(): Unit = {
|
|
|
-
|
|
|
- val where = args_map.keys.map(r => s""" "$r" """).mkString("(", ",", ")")
|
|
|
-
|
|
|
- val org_tab = if (inc) "winhc_ng.bds_change_extract" else "winhc_ng.bds_change_extract_all"
|
|
|
- val ds = getLastPartitionsOrElse(org_tab, "0")
|
|
|
+ private lazy val org_tab = if (inc) "winhc_ng.bds_change_extract" else "winhc_ng.bds_change_extract_all"
|
|
|
+ private lazy val ds = getLastPartitionsOrElse(org_tab, "0")
|
|
|
|
|
|
- val rdd: RDD[CompanyDynamicRecord] = sql(
|
|
|
+ private def get_rdd(tn: String): RDD[CompanyDynamicRecord] = {
|
|
|
+ val rdd = sql(
|
|
|
s"""
|
|
|
|SELECT *
|
|
|
|FROM $org_tab
|
|
|
|WHERE ds = $ds
|
|
|
- |AND tn in $where
|
|
|
- |""".stripMargin).rdd.map(r => {
|
|
|
+ |AND tn = '$tn'
|
|
|
+ |""".stripMargin)
|
|
|
+ .rdd.map(r => {
|
|
|
val value = r.getAs[String]("change_fields")
|
|
|
val change_fields: Seq[String] = if (StringUtils.isEmpty(value)) Seq.empty else value.split(",")
|
|
|
ChangeExtract(rowkey = r.getAs("rowkey")
|
|
@@ -102,26 +100,27 @@ case class NgCompanyDynamic(s: SparkSession,
|
|
|
}).flatMap(r => args_map(r.tn).flat_map.apply(r))
|
|
|
.map(_.format())
|
|
|
.filter(_ != null)
|
|
|
- .cache()
|
|
|
|
|
|
- //todo 可将rdd直接落hbase库
|
|
|
+ rdd
|
|
|
+ }
|
|
|
|
|
|
+ def calc(): Unit = {
|
|
|
+ //todo 可将rdd直接落hbase库
|
|
|
|
|
|
var rdd_map: mutable.Map[String, RDD[CompanyDynamicRecord]] = mutable.Map.empty
|
|
|
|
|
|
for (elem <- args) {
|
|
|
- var tmp_rdd: RDD[CompanyDynamicRecord] = null
|
|
|
- if (elem.group_by_key == null) {
|
|
|
- tmp_rdd = rdd.filter(r => elem.tn.equals(r.tn))
|
|
|
- } else {
|
|
|
+ var tmp_rdd: RDD[CompanyDynamicRecord] = get_rdd(elem.tn)
|
|
|
+
|
|
|
+ if (elem.group_by_key != null) {
|
|
|
if (elem.group_by_pre == null) {
|
|
|
println(s"elem.tn = ${elem.tn} : groupBy and flatMap !")
|
|
|
- tmp_rdd = rdd.filter(r => elem.tn.equals(r.tn))
|
|
|
+ tmp_rdd = tmp_rdd
|
|
|
.groupBy(r => args_map(elem.tn).group_by_key.apply(r))
|
|
|
.flatMap(r => args_map(elem.tn).group_by_flat_map(r._2.toSeq))
|
|
|
} else {
|
|
|
println(s"elem.tn = ${elem.tn} : flatMap , groupBy and flatMap")
|
|
|
- tmp_rdd = rdd.filter(r => elem.tn.equals(r.tn))
|
|
|
+ tmp_rdd = tmp_rdd
|
|
|
.flatMap(r => args_map(elem.tn).group_by_pre.apply(r))
|
|
|
.filter(_ != null)
|
|
|
.groupBy(r => args_map(elem.tn).group_by_key.apply(r))
|