|
@@ -8,11 +8,13 @@ import com.helospark.lightdi.LightDiContext;
|
|
|
import com.helospark.lightdi.annotation.Autowired;
|
|
|
import com.helospark.lightdi.annotation.Service;
|
|
|
import com.winhc.dataworks.flow.touch.bean.*;
|
|
|
+import com.winhc.dataworks.flow.touch.configuration.DataWorksAccessProperties;
|
|
|
import com.winhc.dataworks.flow.touch.configuration.SchemaInit;
|
|
|
import com.winhc.dataworks.flow.touch.service.OdpsService;
|
|
|
import com.winhc.dataworks.flow.touch.service.TouchService;
|
|
|
import com.winhc.dataworks.flow.touch.utils.DateUtils;
|
|
|
import com.winhc.dataworks.flow.touch.utils.DingUtils;
|
|
|
+import com.winhc.dataworks.flow.touch.utils.SparkDaemonThread;
|
|
|
import lombok.SneakyThrows;
|
|
|
import lombok.extern.slf4j.Slf4j;
|
|
|
import org.apache.commons.cli.*;
|
|
@@ -33,6 +35,10 @@ import java.util.stream.Collectors;
|
|
|
public class Main {
|
|
|
@Autowired
|
|
|
private TouchService touchService;
|
|
|
+
|
|
|
+ @Autowired
|
|
|
+ private DataWorksAccessProperties dataWorksAccessProperties;
|
|
|
+
|
|
|
private static final Options options = new Options();
|
|
|
|
|
|
|
|
@@ -51,6 +57,8 @@ public class Main {
|
|
|
|
|
|
options.addOption("flow", "flow", true, "单任务必填,业务流程名");
|
|
|
options.addOption("task", "taskName", true, "单任务必填,任务名");
|
|
|
+ options.addOption("odps", "odps_home", true, "odps cmd 根目录");
|
|
|
+
|
|
|
}
|
|
|
|
|
|
private static void verify(CommandLine commandLine) {
|
|
@@ -58,6 +66,9 @@ public class Main {
|
|
|
if (!commandLine.hasOption("f")) {
|
|
|
throw new RuntimeException();
|
|
|
}
|
|
|
+ if (!commandLine.hasOption("odps")) {
|
|
|
+ throw new RuntimeException();
|
|
|
+ }
|
|
|
if (commandLine.hasOption("s")) {
|
|
|
if (!commandLine.hasOption("flow") || !commandLine.hasOption("task")) {
|
|
|
throw new RuntimeException();
|
|
@@ -127,8 +138,9 @@ public class Main {
|
|
|
if (commandLine.hasOption("d")) {
|
|
|
log.info("debug模式:{}", jobs);
|
|
|
} else {
|
|
|
+ String odpsCmdPath = commandLine.getOptionValue("odps");
|
|
|
dd.send(msg);
|
|
|
- bean.start(bizDate, jobs);
|
|
|
+ bean.start(bizDate, jobs, odpsCmdPath);
|
|
|
}
|
|
|
}
|
|
|
} catch (ParseException e) {
|
|
@@ -146,7 +158,52 @@ public class Main {
|
|
|
}
|
|
|
|
|
|
@SneakyThrows
|
|
|
- private void start(String bizDate, List<DataWorksFlowJob> jobs) {
|
|
|
+ private void start(String bizDate, List<DataWorksFlowJob> jobs, String odpsCmdHome) {
|
|
|
+ //为所有项目空间启动守护线程
|
|
|
+ String accessKeyId = dataWorksAccessProperties.getAccessKeyId();
|
|
|
+ String accessKeySecret = dataWorksAccessProperties.getAccessKeySecret();
|
|
|
+ Set<String> ps = jobs.stream().map(DataWorksFlowJob::getProject).collect(Collectors.toSet());
|
|
|
+ for (String p : ps) {
|
|
|
+ new SparkDaemonThread(p, accessKeyId, accessKeySecret, odpsCmdHome, 90L).start();
|
|
|
+ }
|
|
|
+
|
|
|
+ //运行job,并接收失败参数,最大重试三次
|
|
|
+ Set<TaskInfo> failureTask = run(bizDate, jobs);
|
|
|
+ int i = 3;
|
|
|
+ while (!failureTask.isEmpty() && i-- > 0) {
|
|
|
+ Set<String> fSet = failureTask.stream().map(TaskInfo::getKey).collect(Collectors.toSet());
|
|
|
+
|
|
|
+ List<DataWorksFlowJob> js = jobs.stream().map(job -> {
|
|
|
+ String project = job.getProject();
|
|
|
+ String flow = job.getFlow();
|
|
|
+ List<DataWorksFlowTask> task = job.getTask();
|
|
|
+
|
|
|
+ List<DataWorksFlowTask> collect = task.stream().filter(t -> fSet.contains(project + ":" + flow + ":" + t.getTaskName())
|
|
|
+ ).collect(Collectors.toList());
|
|
|
+
|
|
|
+ if (collect.isEmpty()) {
|
|
|
+ return null;
|
|
|
+ } else {
|
|
|
+ return new DataWorksFlowJob(project, flow, collect);
|
|
|
+ }
|
|
|
+ }).filter(Objects::nonNull).collect(Collectors.toList());
|
|
|
+
|
|
|
+ String collect = js.stream().flatMap(job -> {
|
|
|
+ String project = job.getProject();
|
|
|
+ String flow = job.getFlow();
|
|
|
+ List<DataWorksFlowTask> task = job.getTask();
|
|
|
+ return task.stream().map(t -> project + ":" + flow + ":" + t.getTaskName());
|
|
|
+ }).collect(Collectors.joining(","));
|
|
|
+ dingUtils.send("【" + (3 - i) + "】重新启动以下job:" + collect);
|
|
|
+ failureTask = run(bizDate, js);
|
|
|
+ }
|
|
|
+ if (!failureTask.isEmpty()) {
|
|
|
+ System.exit(-1);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ @SneakyThrows
|
|
|
+ private Set<TaskInfo> run(String bizDate, List<DataWorksFlowJob> jobs) {
|
|
|
log.info("start!");
|
|
|
LocalDateTime start = LocalDateTime.now();
|
|
|
List<TaskInfo> collect = jobs.stream()
|
|
@@ -173,6 +230,7 @@ public class Main {
|
|
|
int totalTask = collect.size();
|
|
|
|
|
|
Set<TaskInfo> end = new HashSet<>();
|
|
|
+ Set<TaskInfo> failureTask = new HashSet<>();
|
|
|
TimedCache<TaskInfo, String> timedCache = CacheUtil.newTimedCache(300 * 1000);
|
|
|
int i = 0;
|
|
|
int successTask = 0;
|
|
@@ -207,8 +265,9 @@ public class Main {
|
|
|
if (failure.size() != 0) {
|
|
|
failedTask++;
|
|
|
log.error("failure node:{} ", failure);
|
|
|
- DingMsg error = new DingMsg("任务失败", taskInfo.getProject(), taskInfo.getFlow(), String.join(",", failure), TaskFlowEnum.FAILURE.getMsg());
|
|
|
+ DingMsg error = new DingMsg("任务失败", taskInfo.getProject(), taskInfo.getFlow(), taskInfo.getTaskName(), String.join(",", failure), TaskFlowEnum.FAILURE.getMsg());
|
|
|
dingUtils.send(error);
|
|
|
+ failureTask.add(taskInfo);
|
|
|
} else {
|
|
|
if (await.size() != 0) {
|
|
|
awaitTask++;
|
|
@@ -230,7 +289,7 @@ public class Main {
|
|
|
if (!timedCache.containsKey(taskInfo) && i <= 6) {
|
|
|
//超两小时
|
|
|
i++;
|
|
|
- DingMsg error = new DingMsg("【" + i + "】任务长时间未结束", taskInfo.getProject(), taskInfo.getFlow(), String.join(",", failure), TaskFlowEnum.RUNNING.getMsg());
|
|
|
+ DingMsg error = new DingMsg("【" + i + "】任务长时间未结束", taskInfo.getProject(), taskInfo.getFlow(), taskInfo.getTaskName(), String.join(",", failure), TaskFlowEnum.RUNNING.getMsg());
|
|
|
dingUtils.send(error);
|
|
|
timedCache.put(taskInfo, "1");
|
|
|
}
|
|
@@ -250,9 +309,7 @@ public class Main {
|
|
|
}
|
|
|
Thread.sleep(10000);
|
|
|
}
|
|
|
- if (failedTask != 0) {
|
|
|
- System.exit(-1);
|
|
|
- }
|
|
|
log.info("end");
|
|
|
+ return failureTask;
|
|
|
}
|
|
|
}
|