Prechádzať zdrojové kódy

feat: 添加案号规整函数

许家凯 4 rokov pred
rodič
commit
a87244ee81

+ 47 - 0
src/main/java/com/winhc/bigdata/udf/CaseNoTrim.java

@@ -0,0 +1,47 @@
+package com.winhc.bigdata.udf;
+
+import com.aliyun.odps.udf.UDF;
+import com.aliyun.odps.utils.StringUtils;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * @Author: XuJiakai
+ * @Date: 2020/9/30 08:55
+ * @Description:
+ */
+public class CaseNoTrim extends UDF {
+
+    private static final Pattern pattern = Pattern.compile(".*([(\\(]\\d{4}[)\\)][^号]*号?).*");
+    private static final Pattern year_pat = Pattern.compile("(\\d{4}?)年");
+
+
+    public String evaluate(String val) {
+        if (StringUtils.isBlank(val)) {
+            return null;
+        }
+        val = toDBC(val);
+        val = val.replace(" ", "");
+        if (val.length() < 8) {
+            return null;
+        }
+        val = year_pat.matcher(val).replaceAll("\\($1\\)");
+
+        Matcher matcher = pattern.matcher(val);
+        return matcher.matches() ? matcher.replaceAll("$1") : null;
+    }
+
+    private static String toDBC(String input) {
+        char[] c = input.toCharArray();
+        for (int i = 0; i < c.length; i++) {
+            if (c[i] == 12288) {
+                c[i] = (char) 32;
+                continue;
+            }
+            if (c[i] > 65280 && c[i] < 65375)
+                c[i] = (char) (c[i] - 65248);
+        }
+        return new String(c);
+    }
+}