Browse Source

Merge remote-tracking branch 'origin/master'

xufei 2 years ago
parent
commit
2fc7c23782

+ 1 - 0
src/main/java/com/winhc/bigdata/udf/RegisteredCapitalTrim.java

@@ -10,6 +10,7 @@ import java.util.regex.Pattern;
  * @author: XuJiakai
  * 2020/11/2 10:16
  * 注册资本提取
+ * registered_capital_trim
  */
 public class RegisteredCapitalTrim extends UDF {
     private static boolean isDouble(String val) {

+ 6 - 0
src/main/java/com/winhc/bigdata/udf/V7CompanyCompare.java

@@ -24,4 +24,10 @@ public class V7CompanyCompare extends UDF {
     private static String coalesce(String str, String defaultStr) {
         return StringUtils.isBlank(str) ? defaultStr : str;
     }
+
+    public static void main(String[] args) {
+        V7CompanyCompare compare = new V7CompanyCompare();
+        String evaluate = compare.evaluate("某公司",null,"1","2");
+        System.out.println(evaluate);
+    }
 }

+ 19 - 0
src/main/java/com/winhc/bigdata/udf/etl/CompanyChangeRegisteredCapitalClassify.java

@@ -0,0 +1,19 @@
+package com.winhc.bigdata.udf.etl;
+
+import com.aliyun.odps.udf.UDF;
+import com.winhc.bigdata.utils.CompanyChangeUtils;
+
+/**
+ * @author: XuJiakai
+ * 2022/5/19 09:35
+ * 对变更记录进行分类,找出注册资本增加和减少的
+ *
+ * company_change_rc_classify
+ */
+public class CompanyChangeRegisteredCapitalClassify extends UDF {
+
+    public Integer evaluate(String changeItem, String contentBefore, String contentAfter) {
+        return CompanyChangeUtils.changeType(changeItem, contentBefore, contentAfter);
+    }
+
+}

+ 80 - 0
src/main/java/com/winhc/bigdata/utils/CompanyChangeUtils.java

@@ -0,0 +1,80 @@
+package com.winhc.bigdata.utils;
+
+import com.alibaba.fastjson.JSONObject;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * @author: XuJiakai
+ * 2022/5/18 09:31
+ * 变列记录判断增减资
+ */
+public class CompanyChangeUtils {
+    /**
+     * @param changeItem
+     * @param changeAfter
+     * @param changeBefore
+     * @return 0 非注册资本变更,1 增资,-1 减资
+     */
+    public static int changeType(String changeItem, String changeBefore, String changeAfter) {
+        if (changeItem == null || changeItem.length() == 0) {
+            return 0;
+        }
+        long count = list.stream().filter(changeItem::contains).count();
+        if (count != 0 && prefixIsNum(changeAfter) && prefixIsNum(changeBefore)) {
+            String contentBeforeVal = RegisteredCapitalTrim.registeredCapitalTrim(changeBefore);
+            String contentAfterVal = RegisteredCapitalTrim.registeredCapitalTrim(changeAfter);
+            if (contentAfterVal == null || contentBeforeVal == null) {
+                return 0;
+            }
+            Double after = Double.parseDouble(contentAfterVal);
+            Double before = Double.parseDouble(contentBeforeVal);
+            return after.compareTo(before);
+        } else {
+            return 0;
+        }
+
+    }
+
+    private static final List<String> list = Arrays.asList(
+            "资本"
+            , "出资"
+            , "资金"
+            , "投资总额"
+    );
+
+
+    private static final Pattern pattern = Pattern.compile("^[0-9].*");
+
+    private static boolean prefixIsNum(String value) {
+        if (value == null) {
+            return false;
+        }
+        value = value.trim().replaceAll("\\n", " ");
+        Matcher matcher = pattern.matcher(value);
+        return matcher.matches();
+    }
+
+
+    public static void main(String[] args) {
+        String json = "{\"content_after\":\"1033.100000\\n\\n840\\n\\n\",\"company_name\":\"兰州正大有限公司\",\"rowkey\":\"15c1f92d96956035d509b65a71431d98\",\"content_before\":\"6380.000000人民币\",\"change_item\":\"投资总额\"}";
+        JSONObject value = JSONObject.parseObject(json);
+
+        String changeItem = value.getString("change_item");
+        String changeBefore = value.getString("content_before");
+        String changeAfter = value.getString("content_after");
+
+        int i = CompanyChangeUtils.changeType(changeItem, changeBefore, changeAfter);
+        System.out.println(i);
+
+        System.out.println(prefixIsNum("1033.100000\n" +
+                "\n" +
+                "840\n" +
+                "\n"));
+//        System.out.println(changeType("册资本", "a1000", "200"));
+    }
+
+}

+ 47 - 0
src/main/java/com/winhc/bigdata/utils/RegisteredCapitalTrim.java

@@ -0,0 +1,47 @@
+package com.winhc.bigdata.utils;
+
+import com.aliyun.odps.utils.StringUtils;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * @author: XuJiakai
+ * 2020/11/2 10:16
+ * 注册资本提取
+ * registered_capital_trim
+ */
+public class RegisteredCapitalTrim {
+    private static boolean isDouble(String val) {
+        try {
+            Double.parseDouble(val);
+            return true;
+        } catch (Exception e) {
+            return false;
+        }
+    }
+
+    private static final Pattern pattern = Pattern.compile("[^0-9.]");
+
+    public static String registeredCapitalTrim(String val) {
+        if (StringUtils.isEmpty(val)) {
+            return null;
+        }
+        if (val.contains("%")) {
+            return null;
+        }
+        Matcher matcher = pattern.matcher(val);
+        try {
+            String a = matcher.replaceAll(" ").split("\\s+")[0];
+            return StringUtils.isNotBlank(a) ? isDouble(a) ? a : null : null;
+        } catch (ArrayIndexOutOfBoundsException exception) {
+            return null;
+        }
+    }
+
+    public static void main(String[] args) {
+        String a = registeredCapitalTrim("50万元人民币");
+        System.out.println(a);
+    }
+
+}