|
@@ -2,7 +2,6 @@ package com.winhc.bigdata.udf;
|
|
|
|
|
|
import com.aliyun.odps.udf.UDF;
|
|
|
import com.aliyun.odps.utils.StringUtils;
|
|
|
-
|
|
|
import java.util.regex.Pattern;
|
|
|
|
|
|
/**
|
|
@@ -10,18 +9,34 @@ import java.util.regex.Pattern;
|
|
|
* @Description: 统一分割符
|
|
|
*/
|
|
|
public class StringReplaceChar extends UDF {
|
|
|
- private static final Pattern pattern = Pattern.compile("[\\u4e00-\\u9fa50-9a-zA-Z]");
|
|
|
-
|
|
|
+ private static final Pattern pattern = Pattern.compile("[^\\u4e00-\\u9fa5a-zA-Z \\(\\)().]+");
|
|
|
public String evaluate(String val) {
|
|
|
- return StringUtils.isNotBlank(val) ? val.replaceAll("、",",")
|
|
|
- .replaceAll(";",",")
|
|
|
- .replaceAll(",",",")
|
|
|
- .replaceAll(" ",",")
|
|
|
- : "";
|
|
|
+ if(StringUtils.isNotBlank(val)){
|
|
|
+
|
|
|
+ StringBuilder b = new StringBuilder();
|
|
|
+ String[] arr = pattern.split(val);
|
|
|
+ int iMax = arr.length - 1;
|
|
|
+ for(int i = 0; ; i++){
|
|
|
+ b.append(arr[i]);
|
|
|
+ if (i == iMax)
|
|
|
+ return b.toString();
|
|
|
+ b.append(",");
|
|
|
+ }
|
|
|
+ }else
|
|
|
+ return "";
|
|
|
+// return StringUtils.isNotBlank(val) ? val.replaceAll("、",",")
|
|
|
+// .replaceAll(";",",")
|
|
|
+// .replaceAll(",",",")
|
|
|
+// .replaceAll(" ",",")
|
|
|
+// .replaceAll(":",",")
|
|
|
+// .replaceAll(":",",")
|
|
|
+// : "";
|
|
|
}
|
|
|
|
|
|
public static void main(String[] args) {
|
|
|
- String res = new StringReplaceChar().evaluate("中国,人123;za 早上5");
|
|
|
+ String res = new StringReplaceChar().evaluate("中国,人123;za 早上5:xx`ss@张思");
|
|
|
System.out.println(res);
|
|
|
+ String res1 = new StringReplaceChar().evaluate("");
|
|
|
+ System.out.println(res1);
|
|
|
}
|
|
|
}
|