Sfoglia il codice sorgente

feat: 添加一系列函数

许家凯 4 anni fa
parent
commit
bf41832bfd

+ 1 - 1
src/main/java/com/winhc/bigdata/udf/CaseNoTrim.java

@@ -28,7 +28,7 @@ public class CaseNoTrim extends UDF {
         }
         val = year_pat.matcher(val).replaceAll("\\($1\\)");
         val = val.replace("(", "(")
-                .replace(")", ")");
+                .replace(")", ")").replaceAll("O","0");
 
         Matcher matcher = pattern.matcher(val);
         return matcher.matches() ? matcher.replaceAll("$1") : null;

+ 56 - 0
src/main/java/com/winhc/bigdata/udf/CompanyOrgTypeTrim.java

@@ -0,0 +1,56 @@
+package com.winhc.bigdata.udf;
+
+import com.aliyun.odps.udf.UDF;
+import com.aliyun.odps.utils.StringUtils;
+
+import java.util.regex.Pattern;
+
+/**
+ * @author: XuJiakai
+ * 2021/1/27 11:38
+ */
+@SuppressWarnings("all")
+public class CompanyOrgTypeTrim extends UDF {
+    private static final Pattern pattern = Pattern.compile("[^\\u4e00-\\u9fa5()()]");
+
+    public String evaluate(String company_name, String company_org_type) {
+        if (company_name.contains("有限责任公司")) {
+            return "有限责任公司";
+        } else if (company_name.contains("股份")) {
+            return "股份有限公司";
+        } else if (company_name.contains("有限公司")) {
+            return "有限责任公司";
+        } else {
+            if (StringUtils.isEmpty(company_org_type)) {
+                return null;
+            }
+            company_org_type = pattern.matcher(company_org_type).replaceAll("");
+            if (StringUtils.isEmpty(company_org_type)) {
+                return null;
+            }
+            if (company_org_type.contains("有限责任公司") || company_org_type.contains("一人有限责任公司") || company_org_type.contains("一人有限责任")) {
+                return "有限责任公司";
+            } else if (company_org_type.contains("股份")) {
+                return "股份有限公司";
+            } else if (company_org_type.contains("个人独资企业")) {
+                return "独资企业";
+            } else if (company_org_type.contains("个人经营") || company_org_type.contains("个体")) {
+                return "个体工商户";
+            } else if (company_org_type.contains("专业合作") || company_org_type.contains("集体")) {
+                return "集体所有制";
+            } else if (company_org_type.contains("有限") && company_org_type.contains("合伙")) {
+                return "有限合伙";
+            } else if (company_org_type.contains("合伙")) {
+                return "普通合伙";
+            } else if (company_org_type.contains("国有") || company_org_type.contains("全民所有制") || company_org_type.contains("国资")) {
+                return "国企";
+            } else if (company_org_type.contains("外商") || company_org_type.contains("中外") || company_org_type.contains("外国") || company_org_type.contains("外资")) {
+                return "外商投资企业";
+            } else if (company_org_type.contains("联营")) {
+                return "联营企业";
+            } else {
+                return null;
+            }
+        }
+    }
+}

+ 25 - 0
src/main/java/com/winhc/bigdata/udf/IsJsonStr.java

@@ -0,0 +1,25 @@
+package com.winhc.bigdata.udf;
+
+import com.aliyun.odps.udf.UDF;
+import com.google.gson.JsonElement;
+import com.google.gson.JsonObject;
+import com.google.gson.JsonParser;
+
+/**
+ * @author: XuJiakai
+ * 2021/2/24 15:02
+ */
+public class IsJsonStr extends UDF {
+    public Boolean evaluate(String val) {
+        JsonElement jsonElement;
+        try {
+            jsonElement = new JsonParser().parse(val);
+        } catch (Exception e) {
+            return false;
+        }
+        if (jsonElement == null) {
+            return false;
+        }
+        return true;
+    }
+}

+ 59 - 0
src/main/java/com/winhc/bigdata/udf/RegStatusStdTrim.java

@@ -0,0 +1,59 @@
+package com.winhc.bigdata.udf;
+
+import com.aliyun.odps.udf.UDF;
+import com.aliyun.odps.utils.StringUtils;
+
+import java.util.regex.Pattern;
+
+/**
+ * @author: XuJiakai
+ * 2021/1/27 15:36
+ */
+@SuppressWarnings("all")
+public class RegStatusStdTrim extends UDF {
+    private static final Pattern pattern = Pattern.compile("[^\\u4e00-\\u9fa5]");
+
+    public String evaluate(String reg_status, String reg_status_std) {
+        String std = get_std(reg_status_std);
+        if (std == null) {
+            std = get_std(reg_status);
+        }
+        if (std == null && StringUtils.isNotBlank(reg_status_std)) {
+            return reg_status_std;
+        }
+        return std;
+    }
+
+    private static String get_std(String str) {
+        if (StringUtils.isEmpty(str)) {
+            return null;
+        }
+        String reg_status_clear = pattern.matcher(str).replaceAll("");
+        if (StringUtils.isEmpty(reg_status_clear)) {
+            return null;
+        }
+        if (reg_status_clear.contains("在业") || reg_status_clear.contains("存续") || reg_status_clear.contains("正常") || reg_status_clear.contains("注册") || reg_status_clear.contains("开业") || reg_status_clear.contains("成立中")) {
+            return "在业/存续";
+        } else if (reg_status_clear.contains("吊销")) {
+            return "吊销";
+        } else if (reg_status_clear.contains("注销")) {
+            return "注销";
+        } else if (reg_status_clear.contains("成立中") || reg_status_clear.contains("申请中") || reg_status_clear.contains("设立中")) {
+            return "筹建";
+        } else if (reg_status_clear.contains("清算")) {
+            return "清算";
+        } else if (reg_status_clear.contains("迁入")) {
+            return "迁入";
+        } else if (reg_status_clear.contains("迁他") || reg_status_clear.contains("迁出")) {
+            return "迁出";
+        } else if (reg_status_clear.contains("停业") || reg_status_clear.contains("停工") || reg_status_clear.contains("歇业")) {
+            return "停业";
+        } else if (reg_status_clear.contains("撤销")) {
+            return "撤销";
+        } else if (reg_status_clear.equals("其他")) {
+            return null;
+        } else {
+            return null;
+        }
+    }
+}