xufei 4 éve
szülő
commit
c9c04e03e8
1 módosított fájl, 24 hozzáadás és 11 törlés
  1. 24 11
      src/main/java/com/winhc/bigdata/udf/StringReplaceChar.java

+ 24 - 11
src/main/java/com/winhc/bigdata/udf/StringReplaceChar.java

@@ -2,8 +2,6 @@ package com.winhc.bigdata.udf;
 
 import com.aliyun.odps.udf.UDF;
 import com.aliyun.odps.utils.StringUtils;
-import jdk.nashorn.internal.runtime.regexp.joni.Regex;
-
 import java.util.regex.Pattern;
 
 /**
@@ -11,19 +9,34 @@ import java.util.regex.Pattern;
  * @Description: 统一分割符
  */
 public class StringReplaceChar extends UDF {
-
+    private static final Pattern pattern = Pattern.compile("[^\\u4e00-\\u9fa5a-zA-Z \\(\\)().]+");
     public String evaluate(String val) {
-        return StringUtils.isNotBlank(val) ? val.replaceAll("、",",")
-                .replaceAll(";",",")
-                .replaceAll(",",",")
-                .replaceAll(" ",",")
-                .replaceAll(":",",")
-                .replaceAll(":",",")
-                : "";
+        if(StringUtils.isNotBlank(val)){
+
+            StringBuilder b = new StringBuilder();
+            String[] arr = pattern.split(val);
+            int iMax = arr.length - 1;
+            for(int i = 0; ; i++){
+                b.append(arr[i]);
+                if (i == iMax)
+                    return b.toString();
+                b.append(",");
+            }
+        }else
+            return "";
+//        return StringUtils.isNotBlank(val) ? val.replaceAll("、",",")
+//                .replaceAll(";",",")
+//                .replaceAll(",",",")
+//                .replaceAll(" ",",")
+//                .replaceAll(":",",")
+//                .replaceAll(":",",")
+//                : "";
     }
 
     public static void main(String[] args) {
-        String res = new StringReplaceChar().evaluate("中国,人123;za 早上5:xx");
+        String res = new StringReplaceChar().evaluate("中国,人123;za 早上5:xx`ss@张思");
         System.out.println(res);
+        String res1 = new StringReplaceChar().evaluate("");
+        System.out.println(res1);
     }
 }