瀏覽代碼

Merge branch 'master' of http://139.224.213.4:3000/bigdata/UDF_Max

# Conflicts:
#	src/main/java/com/winhc/bigdata/udf/CompanyOrgTypeNew.java
JimZhang 2 年之前
父節點
當前提交
52311ec3b6

+ 64 - 0
src/main/java/com/winhc/bigdata/udf/CompanyFinancingRoundStd.java

@@ -0,0 +1,64 @@
+package com.winhc.bigdata.udf;
+
+import com.aliyun.odps.udf.UDF;
+import org.apache.commons.lang.StringUtils;
+
+import java.util.regex.Pattern;
+
+/**
+ * @author π
+ * @Description:
+ * @date 2022/6/9 9:25
+ */
+public class CompanyFinancingRoundStd extends UDF {
+
+    private static final Pattern FINANCING_ALLOW = Pattern.compile("[^a-zA-Z种子轮/天使定向增发战略融资并购]");
+    private static final Pattern FINANCING_UP_E = Pattern.compile("[e-zE-Z]");
+
+    public String evaluate(String financing) {
+        if (StringUtils.isEmpty(financing)) {
+            return null;
+        }
+        String financing_std = FINANCING_ALLOW.matcher(financing).replaceAll("");
+        if (StringUtils.isEmpty(financing_std)) {
+            return "其他";
+        }
+        if (financing_std.contains("A") || financing_std.contains("a")) {
+            return "A轮";
+        }
+        if (financing_std.contains("B") || financing_std.contains("b")) {
+            return "B轮";
+        }
+        if (financing_std.contains("C") || financing_std.contains("c")) {
+            return "C轮";
+        }
+        if (financing_std.contains("D") || financing_std.contains("d")) {
+            return "D轮";
+        }
+        if (financing_std.contains("IPO") || financing_std.contains("ipo")) {
+            return "IPO";
+        }
+        if (financing_std.contains("天使") || financing_std.contains("种子")) {
+            return "种子轮/天使轮";
+        }
+        if (financing_std.contains("定向") || financing_std.contains("增发")) {
+            return "定向增发";
+        }
+        if (financing_std.contains("战略")) {
+            return "战略融资";
+        }
+        if (financing_std.contains("并购")) {
+            return "并购";
+        }
+        if (FINANCING_UP_E.matcher(financing_std).find()) {
+            return "E轮及以上";
+        }
+        return "其他";
+    }
+
+    public static void main(String[] args) {
+        CompanyFinancingRoundStd l = new CompanyFinancingRoundStd();
+        System.out.println(l.evaluate("IPO上市"));
+    }
+
+}

+ 1 - 0
src/main/java/com/winhc/bigdata/udf/RegisteredCapitalTrim.java

@@ -10,6 +10,7 @@ import java.util.regex.Pattern;
  * @author: XuJiakai
  * 2020/11/2 10:16
  * 注册资本提取
+ * registered_capital_trim
  */
 public class RegisteredCapitalTrim extends UDF {
     private static boolean isDouble(String val) {

+ 6 - 0
src/main/java/com/winhc/bigdata/udf/V7CompanyCompare.java

@@ -24,4 +24,10 @@ public class V7CompanyCompare extends UDF {
     private static String coalesce(String str, String defaultStr) {
         return StringUtils.isBlank(str) ? defaultStr : str;
     }
+
+    public static void main(String[] args) {
+        V7CompanyCompare compare = new V7CompanyCompare();
+        String evaluate = compare.evaluate("某公司",null,"1","2");
+        System.out.println(evaluate);
+    }
 }

+ 19 - 0
src/main/java/com/winhc/bigdata/udf/etl/CompanyChangeRegisteredCapitalClassify.java

@@ -0,0 +1,19 @@
+package com.winhc.bigdata.udf.etl;
+
+import com.aliyun.odps.udf.UDF;
+import com.winhc.bigdata.utils.CompanyChangeUtils;
+
+/**
+ * @author: XuJiakai
+ * 2022/5/19 09:35
+ * 对变更记录进行分类,找出注册资本增加和减少的
+ *
+ * company_change_rc_classify
+ */
+public class CompanyChangeRegisteredCapitalClassify extends UDF {
+
+    public Integer evaluate(String changeItem, String contentBefore, String contentAfter) {
+        return CompanyChangeUtils.changeType(changeItem, contentBefore, contentAfter);
+    }
+
+}

+ 13 - 6
src/main/java/com/winhc/bigdata/udf/id_trans.java

@@ -2,6 +2,7 @@ package com.winhc.bigdata.udf;
 
 import com.aliyun.odps.udf.UDF;
 import com.aliyun.odps.utils.StringUtils;
+import com.winhc.bigdata.utils.IDCardUtil;
 
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
@@ -12,15 +13,20 @@ import java.util.regex.Pattern;
  */
 public class id_trans extends UDF {
 
+    private static final Pattern pattern = Pattern.compile("[^0-9xX]");
+
+
     public String evaluate(String id) {
         if (StringUtils.isBlank(id)) {
             return null;
         }
-        if (isNumeric(id.substring(0,id.length() - 1))) {
-            if (id.length() == 18) {
-                return id.replaceAll("\\*","X");
-            } else if (id.length() == 15) {
-                return trans15bitTo18bit(id);
+        String id_tmp = id.replaceAll("\\*", "X");
+        id_tmp = pattern.matcher(id_tmp).replaceAll("");
+        if (IDCardUtil.idCardValidate(id_tmp)) {
+            if (id_tmp.length() == 18) {
+                return id_tmp;
+            } else if (id_tmp.length() == 15) {
+                return trans15bitTo18bit(id_tmp);
             }
         }
         return null;
@@ -101,7 +107,8 @@ public class id_trans extends UDF {
     }
 
     public static void main(String[] args) {
-        String id = "43010219570921303*";
+        String id = "310102197706114820  ";
+        id = "310109410901321 @";
         id_trans n = new id_trans();
         System.out.println(n.evaluate(id));
     }

+ 80 - 0
src/main/java/com/winhc/bigdata/utils/CompanyChangeUtils.java

@@ -0,0 +1,80 @@
+package com.winhc.bigdata.utils;
+
+import com.alibaba.fastjson.JSONObject;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * @author: XuJiakai
+ * 2022/5/18 09:31
+ * 变列记录判断增减资
+ */
+public class CompanyChangeUtils {
+    /**
+     * @param changeItem
+     * @param changeAfter
+     * @param changeBefore
+     * @return 0 非注册资本变更,1 增资,-1 减资
+     */
+    public static int changeType(String changeItem, String changeBefore, String changeAfter) {
+        if (changeItem == null || changeItem.length() == 0) {
+            return 0;
+        }
+        long count = list.stream().filter(changeItem::contains).count();
+        if (count != 0 && prefixIsNum(changeAfter) && prefixIsNum(changeBefore)) {
+            String contentBeforeVal = RegisteredCapitalTrim.registeredCapitalTrim(changeBefore);
+            String contentAfterVal = RegisteredCapitalTrim.registeredCapitalTrim(changeAfter);
+            if (contentAfterVal == null || contentBeforeVal == null) {
+                return 0;
+            }
+            Double after = Double.parseDouble(contentAfterVal);
+            Double before = Double.parseDouble(contentBeforeVal);
+            return after.compareTo(before);
+        } else {
+            return 0;
+        }
+
+    }
+
+    private static final List<String> list = Arrays.asList(
+            "资本"
+            , "出资"
+            , "资金"
+            , "投资总额"
+    );
+
+
+    private static final Pattern pattern = Pattern.compile("^[0-9].*");
+
+    private static boolean prefixIsNum(String value) {
+        if (value == null) {
+            return false;
+        }
+        value = value.trim().replaceAll("\\n", " ");
+        Matcher matcher = pattern.matcher(value);
+        return matcher.matches();
+    }
+
+
+    public static void main(String[] args) {
+        String json = "{\"content_after\":\"1033.100000\\n\\n840\\n\\n\",\"company_name\":\"兰州正大有限公司\",\"rowkey\":\"15c1f92d96956035d509b65a71431d98\",\"content_before\":\"6380.000000人民币\",\"change_item\":\"投资总额\"}";
+        JSONObject value = JSONObject.parseObject(json);
+
+        String changeItem = value.getString("change_item");
+        String changeBefore = value.getString("content_before");
+        String changeAfter = value.getString("content_after");
+
+        int i = CompanyChangeUtils.changeType(changeItem, changeBefore, changeAfter);
+        System.out.println(i);
+
+        System.out.println(prefixIsNum("1033.100000\n" +
+                "\n" +
+                "840\n" +
+                "\n"));
+//        System.out.println(changeType("册资本", "a1000", "200"));
+    }
+
+}

File diff suppressed because it is too large
+ 166 - 0
src/main/java/com/winhc/bigdata/utils/IDCardUtil.java


+ 47 - 0
src/main/java/com/winhc/bigdata/utils/RegisteredCapitalTrim.java

@@ -0,0 +1,47 @@
+package com.winhc.bigdata.utils;
+
+import com.aliyun.odps.utils.StringUtils;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * @author: XuJiakai
+ * 2020/11/2 10:16
+ * 注册资本提取
+ * registered_capital_trim
+ */
+public class RegisteredCapitalTrim {
+    private static boolean isDouble(String val) {
+        try {
+            Double.parseDouble(val);
+            return true;
+        } catch (Exception e) {
+            return false;
+        }
+    }
+
+    private static final Pattern pattern = Pattern.compile("[^0-9.]");
+
+    public static String registeredCapitalTrim(String val) {
+        if (StringUtils.isEmpty(val)) {
+            return null;
+        }
+        if (val.contains("%")) {
+            return null;
+        }
+        Matcher matcher = pattern.matcher(val);
+        try {
+            String a = matcher.replaceAll(" ").split("\\s+")[0];
+            return StringUtils.isNotBlank(a) ? isDouble(a) ? a : null : null;
+        } catch (ArrayIndexOutOfBoundsException exception) {
+            return null;
+        }
+    }
+
+    public static void main(String[] args) {
+        String a = registeredCapitalTrim("50万元人民币");
+        System.out.println(a);
+    }
+
+}