|
@@ -0,0 +1,49 @@
|
|
|
+package com.winhc.bigdata.udf;
|
|
|
+
|
|
|
+import com.aliyun.odps.udf.UDF;
|
|
|
+import com.aliyun.odps.utils.StringUtils;
|
|
|
+
|
|
|
+import java.util.Arrays;
|
|
|
+import java.util.List;
|
|
|
+import java.util.regex.Pattern;
|
|
|
+
|
|
|
+/**
|
|
|
+ * @author: XuJiakai
|
|
|
+ * 2021/1/27 15:36
|
|
|
+ */
|
|
|
+@SuppressWarnings("all")
|
|
|
+public class RegStatusStdTrim2 extends UDF {
|
|
|
+ private static final Pattern pattern = Pattern.compile("[^\\u4e00-\\u9fa5]");
|
|
|
+ private static final List<String> ZY = Arrays.asList("在业","在营","经营","有效","在册","存续","开业","正常","登记","迁入");
|
|
|
+
|
|
|
+ private static final List<String> TY = Arrays.asList("停业","停止经营","责令关闭","停工");
|
|
|
+ public String evaluate(String reg_status) {
|
|
|
+ return get_std(reg_status);
|
|
|
+ }
|
|
|
+
|
|
|
+ private static String get_std(String str) {
|
|
|
+ if (StringUtils.isEmpty(str)) {
|
|
|
+ return "其他";
|
|
|
+ }
|
|
|
+ String reg_status_clear = pattern.matcher(str).replaceAll("");
|
|
|
+ if (StringUtils.isEmpty(reg_status_clear)) {
|
|
|
+ return "其他";
|
|
|
+ }
|
|
|
+ if(ZY.stream().anyMatch(s->reg_status_clear.contains(s))){
|
|
|
+ return "存续";
|
|
|
+ } else if (reg_status_clear.contains("吊销")) {
|
|
|
+ return "吊销";
|
|
|
+ } else if (reg_status_clear.contains("注销")) {
|
|
|
+ return "注销";
|
|
|
+ } else if (reg_status_clear.contains("清算")) {
|
|
|
+ return "清算";
|
|
|
+ } else if (reg_status_clear.contains("迁他") || reg_status_clear.contains("迁出")) {
|
|
|
+ return "迁出";
|
|
|
+ } else if (TY.stream().anyMatch(s->reg_status_clear.contains(s))) {
|
|
|
+ return "停业";
|
|
|
+ } else if (reg_status_clear.contains("撤销")) {
|
|
|
+ return "撤销";
|
|
|
+ }
|
|
|
+ return "其他";
|
|
|
+ }
|
|
|
+}
|