|
@@ -0,0 +1,43 @@
|
|
|
+package com.winhc.bigdata.udf.string;
|
|
|
+
|
|
|
+import com.aliyun.odps.udf.UDF;
|
|
|
+import com.aliyun.odps.utils.StringUtils;
|
|
|
+
|
|
|
+import java.util.regex.Pattern;
|
|
|
+
|
|
|
+/**
|
|
|
+ * @author: XuJiakai
|
|
|
+ * 2021/8/23 15:17
|
|
|
+ */
|
|
|
+public class EfficaciousStr extends UDF {
|
|
|
+ private static final Pattern pattern = Pattern.compile("^[\\u4e00-\\u9fa50-9a-zA-Z ]+$");
|
|
|
+ private static final Pattern num_pat = Pattern.compile("^[0-9 ]+$");
|
|
|
+ private static final Pattern person_pat = Pattern.compile("^[\\u4e00-\\u9fa5a-zA-Z ]+$");
|
|
|
+ private static final Pattern eng_pat = Pattern.compile("^[a-zA-Z ]+$");
|
|
|
+
|
|
|
+ public static final String NAME = "公司名";
|
|
|
+ public static final String PERSON = "人名";
|
|
|
+ public static final String APP = "品牌";
|
|
|
+
|
|
|
+ public Boolean evaluate(String label, String str) {
|
|
|
+ if (StringUtils.isEmpty(str)) {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ if ((NAME.equals(label) || PERSON.equals(label)) && num_pat.matcher(str).matches()) {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ switch (label) {
|
|
|
+ case NAME:
|
|
|
+ return person_pat.matcher(str).matches();
|
|
|
+ case PERSON:
|
|
|
+ if(eng_pat.matcher(str).matches()){
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ return person_pat.matcher(str).matches();
|
|
|
+ case APP:
|
|
|
+ return pattern.matcher(str).matches();
|
|
|
+ default:
|
|
|
+ throw new RuntimeException("label is not define," + label);
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|