|
@@ -0,0 +1,59 @@
|
|
|
+package com.winhc.bigdata.udf;
|
|
|
+
|
|
|
+import com.alibaba.fastjson.JSON;
|
|
|
+import com.alibaba.fastjson.JSONObject;
|
|
|
+import com.aliyun.odps.udf.UDF;
|
|
|
+import com.winhc.bigdata.utils.CompanyUtils;
|
|
|
+import org.apache.commons.collections.CollectionUtils;
|
|
|
+import org.apache.commons.lang3.StringUtils;
|
|
|
+
|
|
|
+import java.util.*;
|
|
|
+import java.util.stream.Collectors;
|
|
|
+
|
|
|
+/**
|
|
|
+ * @Author: π
|
|
|
+ * @Description: 历史名称包含现有名称
|
|
|
+ */
|
|
|
+public class name_contains2 extends UDF {
|
|
|
+
|
|
|
+
|
|
|
+ public List<String> yhcNames(String names) {
|
|
|
+ Set<String> set = new HashSet<>();
|
|
|
+
|
|
|
+ if (org.apache.commons.lang3.StringUtils.isNotBlank(names)) {
|
|
|
+ set.addAll(Arrays.stream(names.split("\t;\t"))
|
|
|
+ .filter(org.apache.commons.lang3.StringUtils::isNotBlank).collect(Collectors.toSet()));
|
|
|
+ }
|
|
|
+ return set.stream()
|
|
|
+ .filter(org.apache.commons.lang3.StringUtils::isNotBlank)
|
|
|
+ .map(x -> x.replaceAll("\t;", "")
|
|
|
+ .replaceAll("[(]", "(")
|
|
|
+ .replaceAll("[)]", ")")
|
|
|
+ .trim())
|
|
|
+ .map(CompanyUtils::cleanup)
|
|
|
+ .filter(org.apache.commons.lang3.StringUtils::isNotBlank)
|
|
|
+ .distinct()
|
|
|
+ .collect(Collectors.toList());
|
|
|
+ }
|
|
|
+
|
|
|
+ public Boolean evaluate(String name, String history_names) {
|
|
|
+ if (StringUtils.isBlank(name) || StringUtils.isBlank(history_names)) {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ List<String> hisNames = yhcNames(history_names);
|
|
|
+ if (hisNames.size() == 0) {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ return hisNames.contains(name);
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ public static void main(String[] args) {
|
|
|
+
|
|
|
+ name_contains2 n = new name_contains2();
|
|
|
+ System.out.println(n.evaluate("11", "22\t;\t112"));
|
|
|
+ System.out.println(n.evaluate("11", "22\t;\t11"));
|
|
|
+ System.out.println(n.evaluate("11", "11,"));
|
|
|
+
|
|
|
+ }
|
|
|
+}
|