|
@@ -1,6 +1,6 @@
|
|
package com.winhc.bigdata.flink.jobs
|
|
package com.winhc.bigdata.flink.jobs
|
|
|
|
|
|
-import com.winhc.bigdata.flink.event.{ElasticSearchInfo, HbaseInfo, UpdateEntity}
|
|
|
|
|
|
+import com.winhc.bigdata.flink.event.{ElasticSearchInfo, HbaseInfo, HologresInfo, UpdateEntity}
|
|
import com.winhc.bigdata.flink.func.{HbaseAsyncFunction, HbaseSinkFunction}
|
|
import com.winhc.bigdata.flink.func.{HbaseAsyncFunction, HbaseSinkFunction}
|
|
import com.winhc.bigdata.flink.implicits._
|
|
import com.winhc.bigdata.flink.implicits._
|
|
import com.winhc.bigdata.flink.java.constant.EnvConst
|
|
import com.winhc.bigdata.flink.java.constant.EnvConst
|
|
@@ -12,10 +12,9 @@ import org.apache.commons.lang3.StringUtils
|
|
import org.apache.flink.api.common.eventtime.WatermarkStrategy
|
|
import org.apache.flink.api.common.eventtime.WatermarkStrategy
|
|
import org.apache.flink.api.common.functions.AggregateFunction
|
|
import org.apache.flink.api.common.functions.AggregateFunction
|
|
import org.apache.flink.api.scala._
|
|
import org.apache.flink.api.scala._
|
|
-import org.apache.flink.configuration.Configuration
|
|
|
|
import org.apache.flink.connector.kafka.source.KafkaSource
|
|
import org.apache.flink.connector.kafka.source.KafkaSource
|
|
import org.apache.flink.streaming.api.functions.ProcessFunction
|
|
import org.apache.flink.streaming.api.functions.ProcessFunction
|
|
-import org.apache.flink.streaming.api.scala.{AsyncDataStream, StreamExecutionEnvironment}
|
|
|
|
|
|
+import org.apache.flink.streaming.api.scala.{AsyncDataStream, DataStream, StreamExecutionEnvironment}
|
|
import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows
|
|
import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows
|
|
import org.apache.flink.streaming.api.windowing.time.Time
|
|
import org.apache.flink.streaming.api.windowing.time.Time
|
|
import org.apache.flink.streaming.api.windowing.windows.TimeWindow
|
|
import org.apache.flink.streaming.api.windowing.windows.TimeWindow
|
|
@@ -25,8 +24,8 @@ import org.json4s.JsonAST.{JNull, JObject, JValue}
|
|
import org.json4s.{JNothing, JString}
|
|
import org.json4s.{JNothing, JString}
|
|
import java.nio.charset.StandardCharsets
|
|
import java.nio.charset.StandardCharsets
|
|
import java.util.concurrent.TimeUnit
|
|
import java.util.concurrent.TimeUnit
|
|
-
|
|
|
|
import com.alibaba.dcm.DnsCacheManipulator
|
|
import com.alibaba.dcm.DnsCacheManipulator
|
|
|
|
+import scala.collection.immutable
|
|
|
|
|
|
|
|
|
|
/**
|
|
/**
|
|
@@ -38,23 +37,25 @@ object TestJob1 {
|
|
DnsCacheManipulator.loadDnsCacheConfig()
|
|
DnsCacheManipulator.loadDnsCacheConfig()
|
|
|
|
|
|
val env = StreamExecutionEnvironment.getExecutionEnvironment
|
|
val env = StreamExecutionEnvironment.getExecutionEnvironment
|
|
|
|
+ //env.getCheckpointConfig.setCheckpointTimeout(60000*5)
|
|
|
|
|
|
env.getConfig.setGlobalJobParameters(EnvConst.createParameterTool(args))
|
|
env.getConfig.setGlobalJobParameters(EnvConst.createParameterTool(args))
|
|
val kafkaSource: KafkaSource[String] = KafkaSourceBuilder.buildSourceFunction("flink_test")
|
|
val kafkaSource: KafkaSource[String] = KafkaSourceBuilder.buildSourceFunction("flink_test")
|
|
|
|
|
|
val source = env.fromSource(kafkaSource, WatermarkStrategy.forMonotonousTimestamps(), "Kafka Source")
|
|
val source = env.fromSource(kafkaSource, WatermarkStrategy.forMonotonousTimestamps(), "Kafka Source")
|
|
|
|
|
|
- val allSource = source.transform_and_validation()
|
|
|
|
|
|
+ val allSource: DataStream[UpdateEntity] = source.transform_and_validation().name("transform_and_validation")
|
|
allSource.getSideOutput(OutputTags.ExceptionTag).map(e => e.toJson()).print()
|
|
allSource.getSideOutput(OutputTags.ExceptionTag).map(e => e.toJson()).print()
|
|
allSource.getSideOutput(OutputTags.TransformErrorTag).map(e => e.toJson()).print()
|
|
allSource.getSideOutput(OutputTags.TransformErrorTag).map(e => e.toJson()).print()
|
|
allSource.getSideOutput(OutputTags.ValidationErrorTag).map(e => e.toJson()).print()
|
|
allSource.getSideOutput(OutputTags.ValidationErrorTag).map(e => e.toJson()).print()
|
|
- val asyncDataStream = AsyncDataStream.unorderedWait(allSource, new HbaseAsyncFunction, 10, TimeUnit.SECONDS)
|
|
|
|
|
|
+ //val asyncDataStream = AsyncDataStream.unorderedWait(allSource, new HbaseAsyncFunction, 10, TimeUnit.SECONDS)
|
|
|
|
|
|
- val afterOutput = asyncDataStream.process(new ProcessFunction[UpdateEntity, (String, Map[String, JObject])] {
|
|
|
|
|
|
+ val afterOutput: DataStream[(String, Map[String, JObject])] = allSource.process(new ProcessFunction[UpdateEntity, (String, Map[String, JObject])] {
|
|
override def processElement(ue: UpdateEntity, ctx: ProcessFunction[UpdateEntity, (String, Map[String, JObject])]#Context, out: Collector[(String, Map[String, JObject])]): Unit = {
|
|
override def processElement(ue: UpdateEntity, ctx: ProcessFunction[UpdateEntity, (String, Map[String, JObject])]#Context, out: Collector[(String, Map[String, JObject])]): Unit = {
|
|
val company = ue.company
|
|
val company = ue.company
|
|
if (company != null) {
|
|
if (company != null) {
|
|
ctx.output(OutputTags.HBASE_SINK_TAG, ("NG_RT_COMPANY", "F", ue.companyId(), company))
|
|
ctx.output(OutputTags.HBASE_SINK_TAG, ("NG_RT_COMPANY", "F", ue.companyId(), company))
|
|
|
|
+ ctx.output(OutputTags.HOLO_SINK_TAG, ("ng_rt_company", "public", ue.companyId(), company))
|
|
}
|
|
}
|
|
for (table <- ue.dims.values) {
|
|
for (table <- ue.dims.values) {
|
|
val meta = table.metaInfo
|
|
val meta = table.metaInfo
|
|
@@ -78,9 +79,18 @@ object TestJob1 {
|
|
})
|
|
})
|
|
case _ =>
|
|
case _ =>
|
|
}*/
|
|
}*/
|
|
|
|
+ meta.holo match {
|
|
|
|
+ case Some(hi) =>
|
|
|
|
+ val HologresInfo(h0, h1) = hi
|
|
|
|
+ data.foreach(t => {
|
|
|
|
+ val (rowkey, jo) = t
|
|
|
|
+ ctx.output(OutputTags.HOLO_SINK_TAG, (h0, h1, rowkey, jo))
|
|
|
|
+ })
|
|
|
|
+ case _ =>
|
|
|
|
+ }
|
|
}
|
|
}
|
|
}
|
|
}
|
|
- })
|
|
|
|
|
|
+ }).name("afterOutput")
|
|
val hbaseSource = afterOutput.getSideOutput(OutputTags.HBASE_SINK_TAG)
|
|
val hbaseSource = afterOutput.getSideOutput(OutputTags.HBASE_SINK_TAG)
|
|
hbaseSource
|
|
hbaseSource
|
|
.map(t => {
|
|
.map(t => {
|
|
@@ -106,7 +116,7 @@ object TestJob1 {
|
|
(table, sp)
|
|
(table, sp)
|
|
case _ => throw new RuntimeException("流数据错误")
|
|
case _ => throw new RuntimeException("流数据错误")
|
|
}
|
|
}
|
|
- })
|
|
|
|
|
|
+ }).name("hbaseMap")
|
|
.keyBy(t => {
|
|
.keyBy(t => {
|
|
val row = new String(t._2.getRowKey, StandardCharsets.UTF_8)
|
|
val row = new String(t._2.getRowKey, StandardCharsets.UTF_8)
|
|
s"${t._1}${StringUtils.left(row, 2)}"
|
|
s"${t._1}${StringUtils.left(row, 2)}"
|
|
@@ -133,7 +143,29 @@ object TestJob1 {
|
|
}
|
|
}
|
|
})
|
|
})
|
|
|
|
|
|
- .addSink(new HbaseSinkFunction)
|
|
|
|
|
|
+ .addSink(new HbaseSinkFunction).name("HbaseSinkFunction")
|
|
|
|
+
|
|
|
|
+// val holoSource: DataStream[(String, String, String, JObject)] = afterOutput.getSideOutput(OutputTags.HOLO_SINK_TAG)
|
|
|
|
+// holoSource.map(t => {
|
|
|
|
+// val (table, pb, row, value) = t
|
|
|
|
+//
|
|
|
|
+// value match {
|
|
|
|
+// case JObject(x) =>
|
|
|
|
+// val x1: immutable.Seq[(String, JValue)] = x
|
|
|
|
+// x.foreach(t => {
|
|
|
|
+// val (k, v) = t
|
|
|
|
+// val stringv = v match {
|
|
|
|
+// case JNull => ""
|
|
|
|
+// case JNothing => ""
|
|
|
|
+// case x: JObject => x.toJson()
|
|
|
|
+// case JString(x) => x
|
|
|
|
+// case x: JValue => x.toString
|
|
|
|
+// }
|
|
|
|
+// })
|
|
|
|
+// (table, sp)
|
|
|
|
+// case _ => throw new RuntimeException("流数据错误")
|
|
|
|
+// }
|
|
|
|
+// })
|
|
|
|
|
|
env.execute("flink start")
|
|
env.execute("flink start")
|
|
}
|
|
}
|