xufei 2 anni fa
parent
commit
272d573d49

+ 40 - 0
src/main/java/com/winhc/bigdata/udf/etl/CompanyPhoneAndOther.java

@@ -0,0 +1,40 @@
+package com.winhc.bigdata.udf.etl;
+
+import com.aliyun.odps.udf.UDF;
+import com.winhc.bigdata.utils.CompanyUtils;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.stream.Collectors;
+
+/**
+ * @author: π
+ * 2022/3/7 11:34
+ */
+public class CompanyPhoneAndOther extends UDF {
+    public List<String> evaluate(String content) {
+        List<String> phones = CompanyUtils.spiltNames(content);
+        if(phones.isEmpty()) return null;
+        String first_phone = "";
+        List<String> phone = phones.stream().filter(x -> x.length() == 11).collect(Collectors.toList());
+        if(phone.isEmpty()){
+            first_phone = phones.get(0);
+        }else {
+            first_phone= phone.get(0);
+        }
+        phones.remove(first_phone);
+        ArrayList<String> reList = new ArrayList();
+        reList.add(first_phone);
+        reList.add(String.join(";", phones));
+
+        return reList;
+    }
+
+    public static void main(String[] args) {
+        //String con = "13611541063\t;\t18916678888\t;\t0527-83626688\t;\t";
+        //String con = "0527-83626688\t;\t13855951234";
+        String con = "";
+        CompanyPhoneAndOther c = new CompanyPhoneAndOther();
+        System.out.println(c.evaluate(con));
+    }
+}

+ 60 - 0
src/main/java/com/winhc/bigdata/udf/historyNamesJudge.java

@@ -0,0 +1,60 @@
+package com.winhc.bigdata.udf;
+
+import com.alibaba.fastjson.JSONObject;
+import com.aliyun.odps.udf.UDF;
+import com.winhc.bigdata.utils.CompanyUtils;
+import org.apache.commons.lang.StringUtils;
+
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+
+/**
+ * @Author: π
+ * @Description: 历史名称判断
+ * 包含 & 符号 或者 含有英文的大陆企业
+ */
+public class historyNamesJudge extends UDF {
+
+    public Boolean evaluate(String names, String companyType) {
+        if (StringUtils.isBlank(names)) return true;
+
+        Set<String> set = new HashSet<>();
+        if (StringUtils.isNotBlank(names)) {
+            set = Arrays.stream(names.split("\t;\t")).filter(StringUtils::isNotBlank).collect(Collectors.toSet());
+        }
+        Set<Boolean> s = set.stream()
+                .map(x -> this.compare2(x, companyType)).collect(Collectors.toSet());
+        return !s.contains(false);
+
+    }
+
+    private static final Pattern pattern2 = Pattern.compile(".*[a-zA-Z]+.*");
+
+    public Boolean compare2(String name, String companyType) {
+        if (com.aliyun.odps.utils.StringUtils.isBlank(name)) return false;
+        if (name.contains("&")) return false;
+        if (StringUtils.isBlank(companyType) || !companyType.equals("1")) return true;
+        return !pattern2.matcher(name).matches();
+    }
+
+    private static final Pattern pattern = Pattern.compile("[^\\u4e00-\\u9fa50-9a-zA-Z()()]");
+
+    public Boolean compare(String name) {
+        if (com.aliyun.odps.utils.StringUtils.isBlank(name)) return false;
+        String clean = pattern.matcher(name).replaceAll("");
+        return clean.equals(name);
+    }
+
+    public static void main(String[] args) {
+        System.out.println(new historyNamesJudge().evaluate("廣滙亞太有限公司111\t;\t(小米)", "2"));
+        System.out.println(new historyNamesJudge().evaluate("廣滙亞太有限公司111\t;\t123x", "1"));
+        System.out.println(new historyNamesJudge().evaluate("廣滙亞太有限公司111\t;\t细末X123", "1"));
+        System.out.println(new historyNamesJudge().evaluate("廣滙亞太111有限公司111\t;\t细末  ,.1111", "1"));
+    }
+
+}
+
+