|
@@ -0,0 +1,46 @@
|
|
|
+package com.winhc.bigdata.udf.etl;
|
|
|
+
|
|
|
+import com.aliyun.odps.udf.UDF;
|
|
|
+import com.aliyun.odps.utils.StringUtils;
|
|
|
+import com.winhc.bigdata.utils.CompanyUtils;
|
|
|
+
|
|
|
+import java.util.ArrayList;
|
|
|
+import java.util.HashSet;
|
|
|
+import java.util.List;
|
|
|
+import java.util.Set;
|
|
|
+import java.util.regex.Pattern;
|
|
|
+import java.util.stream.Collectors;
|
|
|
+
|
|
|
+import static com.winhc.bigdata.utils.CompanyUtils.spiltNames;
|
|
|
+import static com.winhc.bigdata.utils.CompanyUtils.spiltNamesPlus;
|
|
|
+
|
|
|
+/**
|
|
|
+ * @author: π
|
|
|
+ * 2022/3/7 11:34
|
|
|
+ * emails 邮箱(分隔符:\t;\t) 最新一年年报邮箱和已有邮箱并集,最新一年排第一个
|
|
|
+ * phones 电话(分隔符:\t;\t) 最新一年年报电话和已有电话并集,最新一年排第一个
|
|
|
+ */
|
|
|
+public class CompanyPhoneOrEmailMerge extends UDF {
|
|
|
+ public List<String> evaluate(String newContent, String oldContent) {
|
|
|
+ List<String> nc = new ArrayList<>(spiltNamesPlus(newContent));
|
|
|
+ Set<String> oc = new HashSet<>(spiltNamesPlus(oldContent));
|
|
|
+ oc.removeAll(nc);
|
|
|
+ nc.addAll(oc);
|
|
|
+ // String r = String.join("\t;\t", res);
|
|
|
+// return StringUtils.isBlank(r) ? null : r;
|
|
|
+ return nc.stream()
|
|
|
+ .filter(StringUtils::isNotBlank).collect(Collectors.toList());
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ public static void main(String[] args) {
|
|
|
+ String nc = "@@@18916678888中文哈哈哈\t;\t0527-83626688\t;\t";
|
|
|
+// nc = "";
|
|
|
+// nc = null;
|
|
|
+ String oc = "0527-83626688\t;\t13611541063天###@\t;\t18916678888\t;\t0527-83626688\t;\t0527-83626688\t;\t";
|
|
|
+// oc = "";
|
|
|
+// oc = null;
|
|
|
+ CompanyPhoneOrEmailMerge c = new CompanyPhoneOrEmailMerge();
|
|
|
+ System.out.println(c.evaluate(nc, oc));
|
|
|
+ }
|
|
|
+}
|