|
@@ -0,0 +1,20 @@
|
|
|
|
+package com.winhc.bigdata.udf;
|
|
|
|
+
|
|
|
|
+import com.aliyun.odps.udf.UDF;
|
|
|
|
+import com.aliyun.odps.utils.StringUtils;
|
|
|
|
+
|
|
|
|
+import javax.xml.soap.Name;
|
|
|
|
+import java.util.regex.Pattern;
|
|
|
|
+
|
|
|
|
+/**
|
|
|
|
+ * @Author: XuJiakai
|
|
|
|
+ * @Date: 2020/5/14 16:26
|
|
|
|
+ * @Description: 字符串去符号
|
|
|
|
+ */
|
|
|
|
+public class NameCleanup extends UDF {
|
|
|
|
+ private static final Pattern pattern = Pattern.compile("[^\\u4e00-\\u9fa50-9a-zA-Z()() ·]");
|
|
|
|
+
|
|
|
|
+ public String evaluate(String val) {
|
|
|
|
+ return StringUtils.isNotBlank(val) ? pattern.matcher(val).replaceAll("").replaceAll(" +", " ") : "";
|
|
|
|
+ }
|
|
|
|
+}
|