|
@@ -8,11 +8,14 @@ import com.helospark.lightdi.LightDiContext;
|
|
|
import com.helospark.lightdi.annotation.Autowired;
|
|
|
import com.helospark.lightdi.annotation.Service;
|
|
|
import com.winhc.dataworks.flow.touch.bean.*;
|
|
|
+import com.winhc.dataworks.flow.touch.configuration.DataWorksAccessProperties;
|
|
|
import com.winhc.dataworks.flow.touch.configuration.SchemaInit;
|
|
|
import com.winhc.dataworks.flow.touch.service.OdpsService;
|
|
|
import com.winhc.dataworks.flow.touch.service.TouchService;
|
|
|
import com.winhc.dataworks.flow.touch.utils.DateUtils;
|
|
|
import com.winhc.dataworks.flow.touch.utils.DingUtils;
|
|
|
+import com.winhc.dataworks.flow.touch.utils.SparkDaemonKill;
|
|
|
+import com.winhc.dataworks.flow.touch.utils.SparkDaemonUtils;
|
|
|
import lombok.SneakyThrows;
|
|
|
import lombok.extern.slf4j.Slf4j;
|
|
|
import org.apache.commons.cli.*;
|
|
@@ -33,6 +36,10 @@ import java.util.stream.Collectors;
|
|
|
public class Main {
|
|
|
@Autowired
|
|
|
private TouchService touchService;
|
|
|
+
|
|
|
+ @Autowired
|
|
|
+ private DataWorksAccessProperties dataWorksAccessProperties;
|
|
|
+
|
|
|
private static final Options options = new Options();
|
|
|
|
|
|
|
|
@@ -51,6 +58,8 @@ public class Main {
|
|
|
|
|
|
options.addOption("flow", "flow", true, "单任务必填,业务流程名");
|
|
|
options.addOption("task", "taskName", true, "单任务必填,任务名");
|
|
|
+ options.addOption("odps", "odps_home", true, "odps cmd 根目录");
|
|
|
+
|
|
|
}
|
|
|
|
|
|
private static void verify(CommandLine commandLine) {
|
|
@@ -58,6 +67,9 @@ public class Main {
|
|
|
if (!commandLine.hasOption("f")) {
|
|
|
throw new RuntimeException();
|
|
|
}
|
|
|
+ if (!commandLine.hasOption("odps")) {
|
|
|
+ throw new RuntimeException();
|
|
|
+ }
|
|
|
if (commandLine.hasOption("s")) {
|
|
|
if (!commandLine.hasOption("flow") || !commandLine.hasOption("task")) {
|
|
|
throw new RuntimeException();
|
|
@@ -127,8 +139,9 @@ public class Main {
|
|
|
if (commandLine.hasOption("d")) {
|
|
|
log.info("debug模式:{}", jobs);
|
|
|
} else {
|
|
|
+ String odpsCmdPath = commandLine.getOptionValue("odps");
|
|
|
dd.send(msg);
|
|
|
- bean.start(bizDate, jobs);
|
|
|
+ bean.start(bizDate, jobs, odpsCmdPath);
|
|
|
}
|
|
|
}
|
|
|
} catch (ParseException e) {
|
|
@@ -146,7 +159,32 @@ public class Main {
|
|
|
}
|
|
|
|
|
|
@SneakyThrows
|
|
|
- private void start(String bizDate, List<DataWorksFlowJob> jobs) {
|
|
|
+ private void start(String bizDate, List<DataWorksFlowJob> jobs, String odpsCmdHome) {
|
|
|
+ //为所有项目空间启动守护线程
|
|
|
+ String accessKeyId = dataWorksAccessProperties.getAccessKeyId();
|
|
|
+ String accessKeySecret = dataWorksAccessProperties.getAccessKeySecret();
|
|
|
+ Set<String> ps = jobs.stream().map(DataWorksFlowJob::getProject).collect(Collectors.toSet());
|
|
|
+ for (String p : ps) {
|
|
|
+ /*SparkDaemonThread th = new SparkDaemonThread(p, accessKeyId, accessKeySecret, odpsCmdHome, 90L);
|
|
|
+ th.start();*/
|
|
|
+ new SparkDaemonKill(p, accessKeyId, accessKeySecret, odpsCmdHome, SparkDaemonUtils.getQueue(p)).start();
|
|
|
+ }
|
|
|
+
|
|
|
+ //运行job,并接收失败参数,最大重试三次
|
|
|
+ Set<TaskInfo> failureTask = run(bizDate, jobs);
|
|
|
+ int i = 3;
|
|
|
+ while (!failureTask.isEmpty() && i-- > 0) {
|
|
|
+ Set<String> fSet = failureTask.stream().map(TaskInfo::getKey).collect(Collectors.toSet());
|
|
|
+ List<DataWorksFlowJob> js = jobs.stream().filter(job -> fSet.contains(job.getProject() + ":" + job.getFlow() + ":" + job.getTask())).collect(Collectors.toList());
|
|
|
+ failureTask = run(bizDate, js);
|
|
|
+ }
|
|
|
+ if (!failureTask.isEmpty()) {
|
|
|
+ System.exit(-1);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ @SneakyThrows
|
|
|
+ private Set<TaskInfo> run(String bizDate, List<DataWorksFlowJob> jobs) {
|
|
|
log.info("start!");
|
|
|
LocalDateTime start = LocalDateTime.now();
|
|
|
List<TaskInfo> collect = jobs.stream()
|
|
@@ -173,6 +211,7 @@ public class Main {
|
|
|
int totalTask = collect.size();
|
|
|
|
|
|
Set<TaskInfo> end = new HashSet<>();
|
|
|
+ Set<TaskInfo> failureTask = new HashSet<>();
|
|
|
TimedCache<TaskInfo, String> timedCache = CacheUtil.newTimedCache(300 * 1000);
|
|
|
int i = 0;
|
|
|
int successTask = 0;
|
|
@@ -209,6 +248,7 @@ public class Main {
|
|
|
log.error("failure node:{} ", failure);
|
|
|
DingMsg error = new DingMsg("任务失败", taskInfo.getProject(), taskInfo.getFlow(), String.join(",", failure), TaskFlowEnum.FAILURE.getMsg());
|
|
|
dingUtils.send(error);
|
|
|
+ failureTask.add(taskInfo);
|
|
|
} else {
|
|
|
if (await.size() != 0) {
|
|
|
awaitTask++;
|
|
@@ -250,9 +290,7 @@ public class Main {
|
|
|
}
|
|
|
Thread.sleep(10000);
|
|
|
}
|
|
|
- if (failedTask != 0) {
|
|
|
- System.exit(-1);
|
|
|
- }
|
|
|
log.info("end");
|
|
|
+ return failureTask;
|
|
|
}
|
|
|
}
|