|
@@ -2,41 +2,41 @@ package com.winhc.bigdata.udf;
|
|
|
|
|
|
import com.aliyun.odps.udf.UDF;
|
|
import com.aliyun.odps.udf.UDF;
|
|
import com.aliyun.odps.utils.StringUtils;
|
|
import com.aliyun.odps.utils.StringUtils;
|
|
|
|
+
|
|
|
|
+import java.util.Arrays;
|
|
|
|
+import java.util.List;
|
|
import java.util.regex.Pattern;
|
|
import java.util.regex.Pattern;
|
|
|
|
+import java.util.stream.Collectors;
|
|
|
|
|
|
/**
|
|
/**
|
|
* @Author: π
|
|
* @Author: π
|
|
* @Description: 统一分割符
|
|
* @Description: 统一分割符
|
|
*/
|
|
*/
|
|
public class StringReplaceChar extends UDF {
|
|
public class StringReplaceChar extends UDF {
|
|
- private static final Pattern pattern = Pattern.compile("[^\\u4e00-\\u9fa5a-zA-Z \\(\\)().]+");
|
|
|
|
|
|
+ private static final Pattern pattern = Pattern.compile("[^\\u4e00-\\u9fa5a-zA-Z\\(\\)()]+");
|
|
|
|
+
|
|
public String evaluate(String val) {
|
|
public String evaluate(String val) {
|
|
- if(StringUtils.isNotBlank(val)){
|
|
|
|
|
|
+ if (StringUtils.isNotBlank(val)) {
|
|
|
|
|
|
- StringBuilder b = new StringBuilder();
|
|
|
|
- String[] arr = pattern.split(val);
|
|
|
|
- int iMax = arr.length - 1;
|
|
|
|
- for(int i = 0; ; i++){
|
|
|
|
- b.append(arr[i]);
|
|
|
|
- if (i == iMax)
|
|
|
|
- return b.toString();
|
|
|
|
- b.append(",");
|
|
|
|
- }
|
|
|
|
- }else
|
|
|
|
|
|
+ String arr = val.replaceAll("、", ",")
|
|
|
|
+ .replaceAll(";", ",")
|
|
|
|
+ .replaceAll(",", ",")
|
|
|
|
+ .replaceAll(" ", ",")
|
|
|
|
+ .replaceAll("。", ",")
|
|
|
|
+ .replaceAll(";", ",")
|
|
|
|
+ .replaceAll(":", ",")
|
|
|
|
+ .replaceAll("\\s+", ",");
|
|
|
|
+ List<String> list = Arrays.asList(arr.split(",")).stream().filter(m -> m.length() > 1).collect(Collectors.toList());
|
|
|
|
+ return String.join(",", list);
|
|
|
|
+ }else{
|
|
return "";
|
|
return "";
|
|
-// return StringUtils.isNotBlank(val) ? val.replaceAll("、",",")
|
|
|
|
-// .replaceAll(";",",")
|
|
|
|
-// .replaceAll(",",",")
|
|
|
|
-// .replaceAll(" ",",")
|
|
|
|
-// .replaceAll(":",",")
|
|
|
|
-// .replaceAll(":",",")
|
|
|
|
-// : "";
|
|
|
|
|
|
+ }
|
|
}
|
|
}
|
|
|
|
|
|
public static void main(String[] args) {
|
|
public static void main(String[] args) {
|
|
- String res = new StringReplaceChar().evaluate("中国,人123;za 早上5:xx`ss@张思");
|
|
|
|
|
|
+ String res = new StringReplaceChar().evaluate("2015)深南法蛇民初第883-887受理郑委,曹 连云,庄忠杰,曹元洪,曹硕");
|
|
System.out.println(res);
|
|
System.out.println(res);
|
|
- String res1 = new StringReplaceChar().evaluate("");
|
|
|
|
|
|
+ String res1 = new StringReplaceChar().evaluate("宜城兴荣民族塑料制品有限公司、屈万英(公民身份号码:**************504X)、何秋实(公民身份号码:**************0055)、曾娟娟(公民身份号码:**************0040)、王琴(公民身份号码:**************5523)、丁国红(公民身份号码:**************2016)");
|
|
System.out.println(res1);
|
|
System.out.println(res1);
|
|
}
|
|
}
|
|
}
|
|
}
|