Jelajahi Sumber

Merge branch 'master' of http://139.224.213.4:3000/bigdata/UDF_Max

许家凯 4 tahun lalu
induk
melakukan
2b0136259b

File diff ditekan karena terlalu besar
+ 60 - 0
src/main/java/com/winhc/bigdata/udf/StringCaseNoExtract.java


+ 23 - 0
src/main/java/com/winhc/bigdata/udf/StringCleanChineseChar.java

@@ -0,0 +1,23 @@
+package com.winhc.bigdata.udf;
+
+import com.aliyun.odps.udf.UDF;
+import com.aliyun.odps.utils.StringUtils;
+
+import java.util.regex.Pattern;
+
+/**
+ * @Author: π
+ * @Description: 去中文
+ */
+public class StringCleanChineseChar extends UDF {
+    private static final Pattern pattern = Pattern.compile("[\\u4e00-\\u9fa50-9a-zA-Z]");
+
+    public String evaluate(String val) {
+        return StringUtils.isNotBlank(val) ? pattern.matcher(val).replaceAll("") : "";
+    }
+
+    public static void main(String[] args) {
+        String res = new StringCleanChineseChar().evaluate("中国,人123,za.早上5");
+        System.out.println(res);
+    }
+}

+ 27 - 0
src/main/java/com/winhc/bigdata/udf/StringReplaceChar.java

@@ -0,0 +1,27 @@
+package com.winhc.bigdata.udf;
+
+import com.aliyun.odps.udf.UDF;
+import com.aliyun.odps.utils.StringUtils;
+
+import java.util.regex.Pattern;
+
+/**
+ * @Author: π
+ * @Description: 统一分割符
+ */
+public class StringReplaceChar extends UDF {
+    private static final Pattern pattern = Pattern.compile("[\\u4e00-\\u9fa50-9a-zA-Z]");
+
+    public String evaluate(String val) {
+        return StringUtils.isNotBlank(val) ? val.replaceAll("、",",")
+                .replaceAll(";",",")
+                .replaceAll(",",",")
+                .replaceAll(" ",",")
+                : "";
+    }
+
+    public static void main(String[] args) {
+        String res = new StringReplaceChar().evaluate("中国,人123;za 早上5");
+        System.out.println(res);
+    }
+}