|
@@ -0,0 +1,60 @@
|
|
|
+package com.winhc.bigdata.udf;
|
|
|
+
|
|
|
+import com.alibaba.fastjson.JSONObject;
|
|
|
+import com.aliyun.odps.udf.UDF;
|
|
|
+import com.winhc.bigdata.utils.CompanyUtils;
|
|
|
+import org.apache.commons.lang.StringUtils;
|
|
|
+
|
|
|
+import java.util.Arrays;
|
|
|
+import java.util.HashSet;
|
|
|
+import java.util.Set;
|
|
|
+import java.util.regex.Pattern;
|
|
|
+import java.util.stream.Collectors;
|
|
|
+
|
|
|
+/**
|
|
|
+ * @Author: π
|
|
|
+ * @Description: 历史名称判断
|
|
|
+ * 包含 & 符号 或者 含有英文的大陆企业
|
|
|
+ */
|
|
|
+public class historyNamesJudge extends UDF {
|
|
|
+
|
|
|
+ public Boolean evaluate(String names, String companyType) {
|
|
|
+ if (StringUtils.isBlank(names)) return true;
|
|
|
+
|
|
|
+ Set<String> set = new HashSet<>();
|
|
|
+ if (StringUtils.isNotBlank(names)) {
|
|
|
+ set = Arrays.stream(names.split("\t;\t")).filter(StringUtils::isNotBlank).collect(Collectors.toSet());
|
|
|
+ }
|
|
|
+ Set<Boolean> s = set.stream()
|
|
|
+ .map(x -> this.compare2(x, companyType)).collect(Collectors.toSet());
|
|
|
+ return !s.contains(false);
|
|
|
+
|
|
|
+ }
|
|
|
+
|
|
|
+ private static final Pattern pattern2 = Pattern.compile(".*[a-zA-Z]+.*");
|
|
|
+
|
|
|
+ public Boolean compare2(String name, String companyType) {
|
|
|
+ if (com.aliyun.odps.utils.StringUtils.isBlank(name)) return false;
|
|
|
+ if (name.contains("&")) return false;
|
|
|
+ if (StringUtils.isBlank(companyType) || !companyType.equals("1")) return true;
|
|
|
+ return !pattern2.matcher(name).matches();
|
|
|
+ }
|
|
|
+
|
|
|
+ private static final Pattern pattern = Pattern.compile("[^\\u4e00-\\u9fa50-9a-zA-Z()()]");
|
|
|
+
|
|
|
+ public Boolean compare(String name) {
|
|
|
+ if (com.aliyun.odps.utils.StringUtils.isBlank(name)) return false;
|
|
|
+ String clean = pattern.matcher(name).replaceAll("");
|
|
|
+ return clean.equals(name);
|
|
|
+ }
|
|
|
+
|
|
|
+ public static void main(String[] args) {
|
|
|
+ System.out.println(new historyNamesJudge().evaluate("廣滙亞太有限公司111\t;\t(小米)", "2"));
|
|
|
+ System.out.println(new historyNamesJudge().evaluate("廣滙亞太有限公司111\t;\t123x", "1"));
|
|
|
+ System.out.println(new historyNamesJudge().evaluate("廣滙亞太有限公司111\t;\t细末X123", "1"));
|
|
|
+ System.out.println(new historyNamesJudge().evaluate("廣滙亞太111有限公司111\t;\t细末 ,.1111", "1"));
|
|
|
+ }
|
|
|
+
|
|
|
+}
|
|
|
+
|
|
|
+
|