Ver Fonte

add udf

xufei há 2 anos atrás
pai
commit
4276faf725

+ 48 - 0
src/main/java/com/winhc/bigdata/historyNamesTrans.java

@@ -0,0 +1,48 @@
+package com.winhc.bigdata;
+
+import com.alibaba.fastjson.JSONObject;
+import com.aliyun.odps.udf.UDF;
+import com.winhc.bigdata.utils.CompanyUtils;
+import org.apache.commons.lang.StringUtils;
+
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+/**
+ * @Author: π
+ * @Description: 公司名转换
+ */
+public class historyNamesTrans extends UDF {
+
+    public String evaluate(String cname, String names) {
+        if (StringUtils.isEmpty(names)) {
+            return null;
+        } else {
+            Set<String> set = new HashSet<>();
+            if (StringUtils.isNotBlank(names)) {
+                set = Arrays.stream(names.split("\t;\t")).filter(StringUtils::isNotBlank).collect(Collectors.toSet());
+            }
+            Set<JSONObject> s = set.stream()
+                    .filter(StringUtils::isNotBlank)
+                    .map(StringUtils::trim)
+                    .filter(x -> !x.equals(cname))
+                    .map(CompanyUtils::getCompanyName).collect(Collectors.toSet());
+            if (s.isEmpty()) {
+                return null;
+            } else {
+                return JSONObject.toJSONString(s);
+            }
+        }
+    }
+
+    public static void main(String[] args) {
+        System.out.println(new historyNamesTrans().evaluate("廣滙亞太有限公司","廣滙亞太有限公司111"));
+        System.out.println(new historyNamesTrans().evaluate(null,"廣滙亞太有限公司111"));
+        System.out.println(new historyNamesTrans().evaluate(null,""));
+    }
+
+}
+
+

+ 24 - 0
src/main/java/com/winhc/bigdata/udf/cnameTrans.java

@@ -0,0 +1,24 @@
+package com.winhc.bigdata.udf;
+
+import com.alibaba.fastjson.JSONObject;
+import com.aliyun.odps.udf.UDF;
+import com.winhc.bigdata.utils.CompanyUtils;
+
+/**
+ * @Author: π
+ * @Description: 公司名转换
+ */
+public class cnameTrans extends UDF {
+
+    public String evaluate(String name) {
+        return JSONObject.toJSONString(CompanyUtils.getCompanyName(name));
+    }
+
+    public static void main(String[] args) {
+        System.out.println(new cnameTrans().evaluate("廣滙亞太有限公司"));
+        System.out.println(new cnameTrans().evaluate(null));
+    }
+
+}
+
+

+ 31 - 0
src/main/java/com/winhc/bigdata/utils/CompanyUtils.java

@@ -1,12 +1,17 @@
 package com.winhc.bigdata.utils;
 
+import com.alibaba.fastjson.JSONObject;
 import com.aliyun.odps.utils.StringUtils;
+import com.github.houbb.opencc4j.util.ZhConverterUtil;
 
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
+import java.util.regex.Pattern;
 import java.util.stream.Collectors;
 
+import static com.github.stuxuhai.jpinyin.ChineseHelper.convertToSimplifiedChinese;
+
 /**
  * @author: XuJiakai
  * 2021/12/6 11:27
@@ -30,4 +35,30 @@ public class CompanyUtils {
             return val;
         }
     }
+    private static final Pattern pattern = Pattern.compile("[^\\u4e00-\\u9fa50-9a-zA-Z]");
+
+    public static String cleanup(String s) {
+        if (StringUtils.isBlank(s)) return "";
+        return pattern.matcher(s).replaceAll("");
+    }
+
+    public static JSONObject getCompanyName(String name) {
+        if (StringUtils.isEmpty(name)) return null;
+        else {
+            String show = name.replaceAll("\t;", "").trim();
+            String value = cleanup(name);
+            String simplifiedChineseValue = ZhConverterUtil.toSimple(value);
+            String simplifiedChinese = ZhConverterUtil.toSimple(show);
+            JSONObject j = new JSONObject();
+            if (show.equals(simplifiedChinese)) {
+                j.put("show", show);
+                j.put("value", value);
+            } else {
+                j.put("show", show);
+                j.put("value", simplifiedChineseValue);
+                j.put("simplified_chinese", simplifiedChinese);
+            }
+            return j;
+        }
+    }
 }