Quellcode durchsuchen

Merge remote-tracking branch 'origin/master'

许家凯 vor 1 Jahr
Ursprung
Commit
972c5b8de0
45 geänderte Dateien mit 34361 neuen und 9 gelöschten Zeilen
  1. 40 0
      src/main/java/com/winhc/bigdata/udf/CaseNoJudge.java
  2. 31 0
      src/main/java/com/winhc/bigdata/udf/CleanupNotNumber.java
  3. 284 0
      src/main/java/com/winhc/bigdata/udf/CompanyBidTrans.java
  4. 117 0
      src/main/java/com/winhc/bigdata/udf/CompanyCase.java
  5. 52 0
      src/main/java/com/winhc/bigdata/udf/CompanyMonitorTrans.java
  6. 113 0
      src/main/java/com/winhc/bigdata/udf/CompanyTmTrans.java
  7. 25 0
      src/main/java/com/winhc/bigdata/udf/Content2Csv.java
  8. 36 0
      src/main/java/com/winhc/bigdata/udf/ContentParseError.java
  9. 64 0
      src/main/java/com/winhc/bigdata/udf/CourtName2Province.java
  10. 33 0
      src/main/java/com/winhc/bigdata/udf/CsvTrans.java
  11. 52 0
      src/main/java/com/winhc/bigdata/udf/DataRemoveKeys.java
  12. 55 0
      src/main/java/com/winhc/bigdata/udf/GetCapitalActualAmt.java
  13. 52 0
      src/main/java/com/winhc/bigdata/udf/GetCaseAmt.java
  14. 101 0
      src/main/java/com/winhc/bigdata/udf/GetCompanyChangeInfo.java
  15. 129 0
      src/main/java/com/winhc/bigdata/udf/GetCompanyPatentNewInfo.java
  16. 95 0
      src/main/java/com/winhc/bigdata/udf/GetCompletionAcceptanceInfo.java
  17. 100 0
      src/main/java/com/winhc/bigdata/udf/GetConstructionBidInfo.java
  18. 98 0
      src/main/java/com/winhc/bigdata/udf/GetConstructionContractInfo.java
  19. 112 0
      src/main/java/com/winhc/bigdata/udf/GetConstructionPermitInfo.java
  20. 127 0
      src/main/java/com/winhc/bigdata/udf/GetConstructionProjectDetailInfo.java
  21. 48 0
      src/main/java/com/winhc/bigdata/udf/GetHolderInfo.java
  22. 79 0
      src/main/java/com/winhc/bigdata/udf/GetParticipatingUnitsInfo.java
  23. 101 0
      src/main/java/com/winhc/bigdata/udf/GetWorkingDrawingInfo.java
  24. 38 0
      src/main/java/com/winhc/bigdata/udf/HistoryNameSplit.java
  25. 20 0
      src/main/java/com/winhc/bigdata/udf/Json2Str.java
  26. 54 0
      src/main/java/com/winhc/bigdata/udf/Name2Lawyer.java
  27. 32 0
      src/main/java/com/winhc/bigdata/udf/NameCleanupV2.java
  28. 40 0
      src/main/java/com/winhc/bigdata/udf/RelationSplit.java
  29. 72 0
      src/main/java/com/winhc/bigdata/udf/ToProvinceCode.java
  30. 75 0
      src/main/java/com/winhc/bigdata/udf/UrlMatch.java
  31. 84 0
      src/main/java/com/winhc/bigdata/udf/finance_info_update.java
  32. 43 0
      src/main/java/com/winhc/bigdata/udf/get_content.java
  33. 51 0
      src/main/java/com/winhc/bigdata/udf/get_content_update.java
  34. 44 0
      src/main/java/com/winhc/bigdata/udf/get_info_company.java
  35. 1 2
      src/main/java/com/winhc/bigdata/udf/get_legal_v2.java
  36. 33 0
      src/main/java/com/winhc/bigdata/udf/get_legal_v3.java
  37. 62 0
      src/main/java/com/winhc/bigdata/udf/get_sqrinfo.java
  38. 2 3
      src/main/java/com/winhc/bigdata/udf/get_url.java
  39. 41 0
      src/main/java/com/winhc/bigdata/udf/get_url2.java
  40. 75 0
      src/main/java/com/winhc/bigdata/udf/judge_number.java
  41. 71 0
      src/main/java/com/winhc/bigdata/udf/judrisk/ExtractCompanyCase.java
  42. 46 0
      src/main/java/com/winhc/bigdata/udf/number_parse.java
  43. 32 0
      src/main/java/com/winhc/bigdata/udf/reg_capital_update.java
  44. 9 4
      src/main/java/com/winhc/bigdata/udf/split_names.java
  45. 31492 0
      src/main/resources/all_court.txt

+ 40 - 0
src/main/java/com/winhc/bigdata/udf/CaseNoJudge.java

@@ -0,0 +1,40 @@
+package com.winhc.bigdata.udf;
+
+import com.aliyun.odps.udf.UDF;
+import com.aliyun.odps.utils.StringUtils;
+
+import java.util.regex.Pattern;
+
+/**
+ * @Author: π
+ * @Date: 2023/1/30 16:26
+ * @Description: 中文长度大于10
+ * 刑、民、行、赔、执、号,出现频率大于1
+ * 有标点符号
+ */
+public class CaseNoJudge extends UDF {
+    private static final Pattern pattern = Pattern.compile("[^\\u4e00-\\u9fa5]");
+    private static final Pattern pattern2 = Pattern.compile(".*[刑行赔].*?[刑行赔].*|.*[号].*?[号].*|.*[民].*?[民].*|.*[执].*?[执].*");
+
+    private static final Pattern pattern3 = Pattern.compile("[^\\u4e00-\\u9fa50-9a-zA-Z()()]");
+
+
+    public Boolean evaluate(String caseNo) {
+        if (StringUtils.isBlank(caseNo)) return true;
+        String t1 = pattern.matcher(caseNo).replaceAll("");
+        if (t1.length() >= 10) return false;
+
+        boolean m2 = pattern2.matcher(caseNo).matches();
+        if (m2) return false;
+
+        String t3 = pattern3.matcher(caseNo).replaceAll("");
+        return t3.length() == caseNo.length();
+    }
+
+    public static void main(String[] args) {
+        CaseNoJudge j = new CaseNoJudge();
+        System.out.println(j.evaluate("穗劳人仲案(2019) 228号"));
+        System.out.println(j.evaluate("(2020)粤5122刑初185、(2020)粤51刑终114号"));
+        System.out.println(j.evaluate("(2020)粤5122事185、(2020)粤51刑终114号"));
+    }
+}

+ 31 - 0
src/main/java/com/winhc/bigdata/udf/CleanupNotNumber.java

@@ -0,0 +1,31 @@
+package com.winhc.bigdata.udf;
+
+import com.aliyun.odps.udf.UDF;
+import org.apache.commons.lang3.StringUtils;
+
+import java.math.BigDecimal;
+import java.util.regex.Pattern;
+
+/**
+ * @Author: π
+ * @Date: 2020/5/14 16:26
+ * @Description: 字符串去符号
+ */
+public class CleanupNotNumber extends UDF {
+    private static final Pattern pattern_number = Pattern.compile("[^0-9.]");
+
+    public String evaluate(String s) {
+        if (StringUtils.isBlank(s)) return "";
+        String s1 = pattern_number.matcher(s).replaceAll("");
+        try {
+            return new BigDecimal(s1).stripTrailingZeros().toPlainString();
+        } catch (Exception e) {
+            return "";
+        }
+    }
+
+    public static void main(String[] args) {
+        CleanupNotNumber j = new CleanupNotNumber();
+        System.out.println(j.evaluate("38000.00000000万元人民币元"));
+    }
+}

+ 284 - 0
src/main/java/com/winhc/bigdata/udf/CompanyBidTrans.java

@@ -0,0 +1,284 @@
+package com.winhc.bigdata.udf;
+
+import com.alibaba.fastjson.JSON;
+import com.alibaba.fastjson.JSONArray;
+import com.alibaba.fastjson.JSONObject;
+import com.alibaba.fastjson.serializer.SerializerFeature;
+import com.aliyun.odps.udf.UDF;
+import com.aliyun.odps.utils.StringUtils;
+
+import java.util.*;
+import java.util.concurrent.atomic.AtomicReference;
+import java.util.stream.Collectors;
+
+/**
+ * @Author: π
+ * @Date: 2023/7/27 16:26
+ * 招投标转换
+ * <p>
+ * [{"company_id":"2c9d7aec5ac48c1f505b7cc74123b8f1","name":"西安市公路工程管理处"},{"company_id":"cec3be99b8395225c4c7519f2416f4ee","name":"正衡工程项目管理有限公司"}]
+ * 3297737385;3097206773
+ * [{"gid":"3097206773","name":"西安市公路工程管理处"}]
+ * 招标公告
+ */
+public class CompanyBidTrans extends UDF {
+
+    public String evaluate(String wtbamtInfo) {
+        if (StringUtils.isBlank(wtbamtInfo)) return null;
+
+        List<JSONObject> re = JSON.parseArray(wtbamtInfo)
+                .toJavaList(JSONObject.class)
+                .stream()
+                .peek(j -> j.fluentPut("project_bid_money", ""))
+                .collect(Collectors.toList());
+        return JSONObject.toJSONString(re);
+    }
+
+    public List<String> evaluate(String purchaser, String proxy) {
+        Set<String> set = new HashSet<>();
+        if (StringUtils.isNotBlank(purchaser)) {
+            if (purchaser.startsWith("[{")) {
+                JSONArray ja = new JSONArray();
+                try {
+                    ja = JSON.parseArray(purchaser);
+                } catch (Exception e) {
+
+                }
+                List<String> re = ja.toJavaList(JSONObject.class)
+                        .stream()
+                        .map(s -> s.getString("name"))
+                        .filter(Objects::nonNull)
+                        .distinct().collect(Collectors.toList());
+                set.addAll(re);
+            } else if (purchaser.startsWith("[[{")) {
+                JSONArray ja = new JSONArray();
+                try {
+                    ja = JSON.parseArray(purchaser);
+                } catch (Exception e) {
+
+                }
+                List<String> re = ja.toJavaList(JSONArray.class).stream()
+                        .flatMap(jj1 -> jj1.toJavaList(JSONObject.class).stream()
+                                .map(s -> s.getString("name"))
+                                .filter(Objects::nonNull).
+                                distinct()).collect(Collectors.toList());
+
+                set.addAll(re);
+            } else {
+                List<String> re2 = Arrays.stream(purchaser.split("、"))
+                        .filter(Objects::nonNull)
+                        .distinct().collect(Collectors.toList());
+                set.addAll(re2);
+            }
+        }
+
+        if (StringUtils.isNotBlank(proxy)) {
+            List<String> re3 = Arrays.stream(proxy.split("、"))
+                    .filter(Objects::nonNull)
+                    .distinct().collect(Collectors.toList());
+            set.addAll(re3);
+        }
+
+        return new ArrayList<>(set);
+    }
+
+    public String evaluate(String datas, String purchaser, String proxy, String type, String title) {
+
+        JSONArray ej = new JSONArray();
+
+        Map<String, JSONObject> names = Optional.ofNullable(JSON.parseArray(datas))
+                .orElse(new JSONArray())
+                .toJavaList(JSONObject.class).stream()
+                .filter(Objects::nonNull).collect(Collectors.toMap(t -> t.getString("name"), t -> t, (n, o) -> o));
+
+        List<String> nameList = new ArrayList<>();
+        AtomicReference<Integer> companyNumber = new AtomicReference<>(0);
+        List<JSONObject> purchaserList = new ArrayList<>();
+        if (StringUtils.isNotBlank(purchaser)) {
+            if (purchaser.startsWith("[{")) {
+                JSONArray ja = new JSONArray();
+                try {
+                    ja = JSON.parseArray(purchaser);
+                } catch (Exception e) {
+
+                }
+                ja.toJavaList(JSONObject.class)
+                        .stream()
+                        .filter(Objects::nonNull)
+                        .forEach(d -> {
+                            String name = d.getString("name");
+                            String keyno = "";
+                            if (names.containsKey(name)) {
+                                keyno = names.get(name).getString("company_id");
+                            }
+                            JSONObject re = new JSONObject()
+                                    .fluentPut("name", name)
+                                    .fluentPut("keyno", keyno)
+                                    .fluentPut("contacts", ej)
+                                    .fluentPut("phones", ej)
+                                    .fluentPut("address", ej)
+                                    .fluentPut("email", ej);
+                            purchaserList.add(re);
+                            nameList.add(name);
+                            companyNumber.updateAndGet(v -> v + 1);
+                        });
+            } else if (purchaser.startsWith("[[{")) {
+                JSONArray ja = new JSONArray();
+                try {
+                    ja = JSON.parseArray(purchaser);
+                } catch (Exception e) {
+
+                }
+
+                ja.toJavaList(JSONArray.class).stream()
+                        .flatMap(jj1 -> jj1.toJavaList(JSONObject.class).stream()
+                                .filter(Objects::nonNull).
+                                distinct()).forEach(d -> {
+                            String name = d.getString("name");
+                            String keyno = "";
+                            if (names.containsKey(name)) {
+                                keyno = names.get(name).getString("company_id");
+                            }
+                            JSONObject re = new JSONObject()
+                                    .fluentPut("name", name)
+                                    .fluentPut("keyno", keyno)
+                                    .fluentPut("contacts", ej)
+                                    .fluentPut("phones", ej)
+                                    .fluentPut("address", ej)
+                                    .fluentPut("email", ej);
+                            purchaserList.add(re);
+                            nameList.add(name);
+                            companyNumber.updateAndGet(v -> v + 1);
+                        });
+
+            } else {
+                Arrays.stream(purchaser.split("、"))
+                        .filter(Objects::nonNull)
+                        .forEach(d1 -> {
+                            String keyno = "";
+                            if (names.containsKey(d1)) {
+                                keyno = names.get(d1).getString("company_id");
+                            }
+                            JSONObject re = new JSONObject()
+                                    .fluentPut("name", d1)
+                                    .fluentPut("keyno", keyno)
+                                    .fluentPut("contacts", ej)
+                                    .fluentPut("phones", ej)
+                                    .fluentPut("address", ej)
+                                    .fluentPut("email", ej);
+                            purchaserList.add(re);
+                            nameList.add(d1);
+                            companyNumber.updateAndGet(v -> v + 1);
+                        });
+            }
+        }
+        //代理机构
+        List<JSONObject> proxyList = new ArrayList<>();
+        if (StringUtils.isNotBlank(proxy)) {
+            Arrays.stream(proxy.split("、"))
+                    .filter(Objects::nonNull)
+                    .forEach(d1 -> {
+                        String keyno = "";
+                        if (names.containsKey(d1)) {
+                            keyno = names.get(d1).getString("company_id");
+                        }
+                        JSONObject re = new JSONObject()
+                                .fluentPut("name", d1)
+                                .fluentPut("keyno", keyno)
+                                .fluentPut("contacts", ej)
+                                .fluentPut("phones", ej)
+                                .fluentPut("address", ej)
+                                .fluentPut("email", ej);
+                        proxyList.add(re);
+                        nameList.add(d1);
+                        companyNumber.updateAndGet(v -> v + 1);
+                    });
+        }
+        //类型
+        String main_type = "";
+        if (StringUtils.isNotBlank(type)) {
+            if (type.contains("招标") || type.contains("采购") || type.contains("更正公告") || type.contains("交易公告")) {
+                main_type = "20";
+            } else if (type.contains("中标") || type.contains("结果公示") || type.contains("成交")) {
+                main_type = "30";
+            } else {
+                main_type = "0";
+            }
+        }
+        //投标单位,中标单位
+        List<JSONObject> supplierInfoList = new ArrayList<>();
+        List<JSONObject> wtbamtInfoList = new ArrayList<>();
+        List<JSONObject> winnerCandidateList = new ArrayList<>();
+
+        AtomicReference<Integer> identity_code = new AtomicReference<>(0);  // 默认为投标单位
+        if (main_type.equalsIgnoreCase("30")) {
+            if (names.size() - companyNumber.get() == 1) {
+                identity_code.set(1);  // 中标单位
+            }
+        }
+        if (title.contains("候选")) {
+            identity_code.set(2);  // 候选单位
+        }
+        names.forEach((key, data) -> {
+            if (!nameList.contains(key)) {
+                if (identity_code.get() == 0) {
+                    JSONObject re = new JSONObject()
+                            .fluentPut("name", key)
+                            .fluentPut("keyno", data.getString("company_id"))
+                            .fluentPut("contacts", ej)
+                            .fluentPut("phones", ej)
+                            .fluentPut("address", ej)
+                            .fluentPut("email", ej);
+                    supplierInfoList.add(re);
+                } else if (identity_code.get() == 2) {
+                    JSONObject re = new JSONObject()
+                            .fluentPut("name", key)
+                            .fluentPut("keyno", data.getString("company_id"))
+                            .fluentPut("contacts", ej)
+                            .fluentPut("phones", ej)
+                            .fluentPut("address", ej)
+                            .fluentPut("email", ej);
+                    winnerCandidateList.add(re);
+                } else {
+                    JSONObject re = new JSONObject()
+                            .fluentPut("name", key)
+                            .fluentPut("keyno", data.getString("company_id"))
+                            .fluentPut("contacts", ej)
+                            .fluentPut("phones", ej)
+                            .fluentPut("address", ej)
+                            .fluentPut("project_bid_money", "")
+                            .fluentPut("email", ej);
+                    wtbamtInfoList.add(re);
+                }
+            }
+
+        });
+        String purchaser_keyno = null;
+        if (purchaserList.size() > 0) {
+            purchaser_keyno = purchaserList.get(0).getString("keyno");
+        }
+        JSONObject j = new JSONObject()
+                .fluentPut("purchaser_keyno", purchaser_keyno)
+                .fluentPut("main_type", main_type)
+                .fluentPut("purchaser_info", purchaserList)
+                .fluentPut("proxy", proxyList)
+                .fluentPut("wtbamt_info", wtbamtInfoList)
+                .fluentPut("winner_candidate", winnerCandidateList)
+                .fluentPut("supplier_info", supplierInfoList);
+
+        return JSONObject.toJSONString(j, SerializerFeature.WriteMapNullValue);
+    }
+
+    public static void main(String[] args) {
+        String datas = "[{\"company_id\":\"123456\",\"name\":\"亳州学院\"},{\"company_id\":\"6e5d52d7bf1cbbac6c3b820dfc8cf0eb\",\"name\":\"亳州市诚信文化体育用品销售有限公司\"},{\"company_id\":\"efaa5ad30ec4e21ed19aae202eeeda29\",\"name\":\"亳州市谯城区信诚商贸有限公司\"}]";
+        String purchaser = "[[{\"gid\":\"2358397210\",\"name\":\"亳州学院\"},{\"gid\":\"2358397210\",\"name\":\"亳州学院\"}]]";
+        String proxy = "";
+        String type = "中标结果";
+        String title = "亳州学院于2018年12月03日成功交易2笔新订单";
+        CompanyBidTrans j = new CompanyBidTrans();
+        String re = j.evaluate(datas, purchaser, proxy, type, title);
+        System.out.println(re);
+        System.out.println(j.evaluate("[{\"address\":[],\"contacts\":[],\"email\":[],\"keyno\":\"852f69e1c4cd443ca6d7347b9c503db4\",\"name\":\"包头市仁武汽修有限责任公司\",\"phones\":[],\"project_bid_money\":[]}]"));
+        System.out.println(j.evaluate(purchaser, proxy));
+    }
+}

+ 117 - 0
src/main/java/com/winhc/bigdata/udf/CompanyCase.java

@@ -0,0 +1,117 @@
+package com.winhc.bigdata.udf;
+
+import com.aliyun.odps.udf.UDF;
+import org.apache.commons.lang3.StringUtils;
+
+public class CompanyCase extends UDF {
+    // TODO define parameters and return type, e.g:  public String evaluate(String a, String b)
+    public String evaluate(String companyId, String ygName, String bgName) {
+        String ret = null;
+
+        if (ygName != null && ygName.contains(companyId)) {
+            ret = "Y";
+        } else if (bgName != null && bgName.contains(companyId)) {
+            ret = "B";
+        } else {
+            ret = "T";
+        }
+        return ret;
+    }
+
+
+    public String evaluate(String companyId, String ygName, String bgName, String isSuccess) {
+        String ret = null;
+
+        if (isSuccess != null) {
+            if (ygName != null && ygName.contains(companyId)) {
+                if (isSuccess.equals("胜")) {
+                    ret = "胜";
+                } else if (isSuccess.equals("负")) {
+                    ret = "负";
+                } else if (isSuccess.equals("平")) {
+                    ret = "平";
+                } else {
+                    ret = "无";
+                }
+            } else if (bgName != null && bgName.contains(companyId)) {
+                if (isSuccess.equals("胜")) {
+                    ret = "负";
+                } else if (isSuccess.equals("负")) {
+                    ret = "胜";
+                } else if (isSuccess.equals("平")) {
+                    ret = "平";
+                } else {
+                    ret = "无";
+                }
+            } else {
+                ret = "无";
+            }
+        } else {
+            ret = "无";
+        }
+        return ret;
+    }
+
+    public String evaluate(String case_role, String isSuccess) {
+        String ret;
+        if (StringUtils.isBlank(isSuccess)) {
+            return "无";
+        }
+        if (case_role.equals("Y")) {
+            switch (isSuccess) {
+                case "胜":
+                    ret = "胜";
+                    break;
+                case "负":
+                    ret = "负";
+                    break;
+                case "平":
+                    ret = "平";
+                    break;
+                default:
+                    ret = "无";
+                    break;
+            }
+        } else if (case_role.equals("B")) {
+            switch (isSuccess) {
+                case "胜":
+                    ret = "负";
+                    break;
+                case "负":
+                    ret = "胜";
+                    break;
+                case "平":
+                    ret = "平";
+                    break;
+                default:
+                    ret = "无";
+                    break;
+            }
+        } else {
+            ret = "无";
+        }
+        return ret;
+    }
+
+
+    public String evaluate(String companyId, String yg_name, String bg_name, String ygLawyer, String bgLawyer) {
+        String ret = "";
+        if (yg_name != null && yg_name.contains(companyId)) {
+            ret = ygLawyer;
+        } else if (bg_name != null && bg_name.contains(companyId)) {
+            ret = bgLawyer;
+        } else {
+            ret = "";
+        }
+
+        return ret;
+    }
+
+    public static void main(String[] args) {
+        CompanyCase c = new CompanyCase();
+        System.out.println(c.evaluate("Y", "胜"));
+        System.out.println(c.evaluate("B", "胜"));
+        System.out.println(c.evaluate("B", null));
+    }
+
+}

+ 52 - 0
src/main/java/com/winhc/bigdata/udf/CompanyMonitorTrans.java

@@ -0,0 +1,52 @@
+package com.winhc.bigdata.udf;
+
+import com.alibaba.fastjson.JSON;
+import com.alibaba.fastjson.JSONArray;
+import com.alibaba.fastjson.JSONObject;
+import com.alibaba.fastjson.serializer.SerializerFeature;
+import com.aliyun.odps.udf.UDF;
+import com.aliyun.odps.utils.StringUtils;
+import com.winhc.bigdata.bean.EntityInfo;
+
+import java.util.*;
+import java.util.concurrent.atomic.AtomicReference;
+import java.util.stream.Collectors;
+
+/**
+ * @Author: π
+ * @Date: 2023/7/27 16:26
+ * 招投标转换
+ * <p>
+ * [{"company_id":"2c9d7aec5ac48c1f505b7cc74123b8f1","name":"西安市公路工程管理处"},{"company_id":"cec3be99b8395225c4c7519f2416f4ee","name":"正衡工程项目管理有限公司"}]
+ * 3297737385;3097206773
+ * [{"gid":"3097206773","name":"西安市公路工程管理处"}]
+ * 招标公告
+ */
+public class CompanyMonitorTrans extends UDF {
+
+    public String evaluate(String wtbamtInfo) {
+        if (StringUtils.isBlank(wtbamtInfo)) return null;
+
+        List<JSONObject> re = JSON.parseArray(wtbamtInfo)
+                .toJavaList(JSONObject.class)
+                .stream()
+                .peek(j -> j.fluentPut("project_bid_money", ""))
+                .collect(Collectors.toList());
+        return JSONObject.toJSONString(re);
+    }
+
+    public static final List<EntityInfo> getEntity(String entity, String id_key, String name_key, String type) {
+        if (org.apache.commons.lang3.StringUtils.isBlank(entity)) return Collections.emptyList();
+        List<JSONObject> l = JSON.parseArray(entity).toJavaList(JSONObject.class);
+        return l.stream().map(x -> {
+            String id = x.getString(id_key);
+            String name = x.getString(name_key);
+            return new EntityInfo(id, name, type);
+        }).collect(Collectors.toList());
+    }
+
+
+    public static void main(String[] args) {
+
+    }
+}

+ 113 - 0
src/main/java/com/winhc/bigdata/udf/CompanyTmTrans.java

@@ -0,0 +1,113 @@
+package com.winhc.bigdata.udf;
+
+import com.alibaba.fastjson.JSON;
+import com.alibaba.fastjson.JSONArray;
+import com.alibaba.fastjson.JSONObject;
+import com.alibaba.fastjson.serializer.SerializerFeature;
+import com.aliyun.odps.udf.UDF;
+import com.aliyun.odps.utils.StringUtils;
+
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
+import java.util.*;
+import java.util.concurrent.atomic.AtomicReference;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+
+import static com.winhc.bigdata.udf.GetCompanyChangeInfo.md5;
+
+/**
+ * @Author: π
+ * @Date: 2023/7/27 16:26
+ * 招投标转换
+ */
+public class CompanyTmTrans extends UDF {
+
+    private static final Pattern pattern1 = Pattern.compile("(\\d{4})年(\\d{1,2})月(\\d{1,2})日.*");
+
+    public String evaluate(String date) {
+        if (StringUtils.isBlank(date)) return null;
+        return extracted(date);
+
+    }
+
+    private static String extracted(String date) {
+        String tmpDate = null;
+        try {
+            if (date.contains("年")) {
+                Matcher m0 = pattern1.matcher(date);
+                boolean matches = m0.matches();
+                if (matches) {
+                    String year = m0.group(1);
+                    String month = m0.group(2);
+                    String day = m0.group(3);
+                    if (month.length() < 2) {
+                        month = "0" + month;
+                    }
+                    if (day.length() < 2) {
+                        day = "0" + day;
+                    }
+                    tmpDate = year + "-" + month + "-" + day;
+                }
+            } else if (date.contains("/")) {
+                tmpDate = date.split(" ")[0].replaceAll("/", "-");
+            } else if (date.contains("-")) {
+                tmpDate = date.split(" ")[0];
+            }
+            DateFormat formatter = new SimpleDateFormat("yyyy-MM-dd");
+            formatter.parse(tmpDate);
+            return tmpDate;
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+        return null;
+    }
+
+    public String evaluate(String applicant_1, String applicant_keyno_1, String applicant_2, String applicant_keyno_2) {
+        JSONArray jj = new JSONArray();
+        if (StringUtils.isNotBlank(applicant_1)) {
+            jj.add(new JSONObject()
+                    .fluentPut("applicant_name", applicant_1)
+                    .fluentPut("applicant_keyno", applicant_keyno_1)
+            );
+        }
+
+        if (StringUtils.isNotBlank(applicant_2)) {
+            jj.add(new JSONObject()
+                    .fluentPut("applicant_name", applicant_2)
+                    .fluentPut("applicant_keyno", applicant_keyno_2)
+            );
+        }
+
+        return JSON.toJSONString(jj);
+    }
+    public String evaluate(String reg_no, String int_cls) {
+        String image_name = reg_no + "/" + int_cls + ".jpg";
+        String filename_md5 = md5(image_name).substring(8, 24);
+        List<String> list = new ArrayList<>();
+        for (int i = 0; i < filename_md5.length(); i += 2) {
+            list.add(filename_md5.substring(i, i + 2));
+        }
+        list.add(image_name);
+        String re = String.join("/", list);
+        return "https://winhc-trademark.oss-cn-shanghai.aliyuncs.com/" + re;
+    }
+
+
+    public static void main(String[] args) {
+
+        CompanyTmTrans j = new CompanyTmTrans();
+        System.out.println(j.evaluate("2005-09-28 00:00:00"));
+        System.out.println(j.evaluate("2005年9月8日 00:00:00"));
+        System.out.println(j.evaluate("2005年09月18日"));
+        System.out.println(j.evaluate("2005/09/28 00:00:00"));
+        System.out.println(j.evaluate("2005-09-28"));
+        System.out.println(j.evaluate("35186311", "41"));
+        System.out.println(j.evaluate("a1", null, "a2", null));
+        System.out.println(j.evaluate("a1", "11", "a2", null));
+        System.out.println(j.evaluate("", null, "a2", "11"));
+        System.out.println(j.evaluate(null, null, null, "11"));
+
+    }
+}

+ 25 - 0
src/main/java/com/winhc/bigdata/udf/Content2Csv.java

@@ -0,0 +1,25 @@
+package com.winhc.bigdata.udf;
+
+import com.aliyun.odps.udf.UDF;
+import org.apache.commons.lang.StringEscapeUtils;
+import org.apache.commons.lang3.StringUtils;
+
+/**
+ * @author: π
+ * 2021/8/30 16:57
+ */
+public class Content2Csv extends UDF {
+
+    public String evaluate(String content) {
+        if (StringUtils.isBlank(content)) return null;
+        return StringEscapeUtils.escapeCsv(content);
+    }
+
+
+    public static void main(String[] args) {
+        Content2Csv j = new Content2Csv();
+        System.out.println(j.evaluate("1997,Ford,E350,\"Super, luxurious truck\""));
+        System.out.println(j.evaluate("遂昌县金竹镇茶竹岭村股,份经济合作社"));
+    }
+
+}

Datei-Diff unterdrückt, da er zu groß ist
+ 36 - 0
src/main/java/com/winhc/bigdata/udf/ContentParseError.java


+ 64 - 0
src/main/java/com/winhc/bigdata/udf/CourtName2Province.java

@@ -0,0 +1,64 @@
+package com.winhc.bigdata.udf;
+
+import com.alibaba.fastjson.JSON;
+import com.aliyun.odps.udf.UDF;
+import com.aliyun.odps.utils.StringUtils;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+
+
+/**
+ * @Author: π
+ * @Description: 法院转换省份
+ */
+public class CourtName2Province extends UDF {
+
+    private static Map<String, String> map = new HashMap<>();
+
+    static {
+        InputStream in = CourtName2Province.class.getClassLoader().getResourceAsStream("all_court.txt");
+        BufferedReader br = new BufferedReader(new InputStreamReader(in));
+        String line = null;
+        while (true) {
+            try {
+                if ((line = br.readLine()) == null) break;
+            } catch (IOException e) {
+                e.printStackTrace();
+            }
+            String[] arr = line.split(",", -1);
+            String court = arr[0];
+            String code = arr[1];
+            if (StringUtils.isNotBlank(court) && StringUtils.isNotBlank(court)) {
+                map.put(court, code);
+            }
+        }
+    }
+
+    public String evaluate(String courtName) {
+        if (StringUtils.isBlank(courtName)) {
+            return null;
+        }
+        List<String> re = Arrays.stream(courtName.split(","))
+                .filter(StringUtils::isNotBlank)
+                .distinct()
+                .map(x -> map.getOrDefault(x, ""))
+                .filter(StringUtils::isNotBlank)
+                .distinct().collect(Collectors.toList());
+        return JSON.toJSONString(re);
+
+    }
+
+    public static void main(String[] args) {
+        CourtName2Province res = new CourtName2Province();
+        System.out.println(res.evaluate("北京市丰台区人民法院,北京市高级人民法院,北京市第二中级人民法院,江苏省无锡市中级人民法院,无锡市惠山区人民法院,江苏省无锡市惠山区人民法院"));
+    }
+}

+ 33 - 0
src/main/java/com/winhc/bigdata/udf/CsvTrans.java

@@ -0,0 +1,33 @@
+package com.winhc.bigdata.udf;
+
+import com.aliyun.odps.udf.UDF;
+import org.apache.commons.lang.StringEscapeUtils;
+import org.apache.commons.lang3.StringUtils;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * @author: π
+ * 2021/8/30 16:57
+ */
+public class CsvTrans extends UDF {
+
+
+    public String evaluate(String code) {
+        if (StringUtils.isBlank(code)) {
+            return "";
+        }
+        return StringEscapeUtils
+                .escapeCsv(code);
+    }
+
+    public static void main(String[] args) {
+        CsvTrans j = new CsvTrans();
+        System.out.println(j.evaluate("I said \"Hey, I am 5'10\".\"\n.,,你好"));
+        System.out.println(j.evaluate("111,222,333"));
+        System.out.println(j.evaluate(""));
+
+    }
+
+}

Datei-Diff unterdrückt, da er zu groß ist
+ 52 - 0
src/main/java/com/winhc/bigdata/udf/DataRemoveKeys.java


+ 55 - 0
src/main/java/com/winhc/bigdata/udf/GetCapitalActualAmt.java

@@ -0,0 +1,55 @@
+package com.winhc.bigdata.udf;
+
+import com.alibaba.fastjson.JSON;
+import com.alibaba.fastjson.JSONObject;
+import com.aliyun.odps.udf.UDF;
+import org.apache.commons.lang3.StringUtils;
+
+import java.math.BigDecimal;
+import java.util.List;
+import java.util.Locale;
+import java.util.Objects;
+import java.util.Optional;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+
+/**
+ * @Author: π
+ * @Date: 2020/5/14 16:26
+ * @Description: 字符串去符号
+ * [{"amomon":"20万元人民币","paymet":"其它","time":""}]
+ */
+public class GetCapitalActualAmt extends UDF {
+
+    private static final Pattern pattern_number = Pattern.compile("[^0-9.]");
+
+    public Double evaluate(String info) {
+        if (StringUtils.isBlank(info)) return null;
+
+        Optional<BigDecimal> reduce = JSON.parseArray(info).toJavaList(JSONObject.class).stream()
+                .map(j -> cleanupNotNumber(j.getString("amomon")))
+                .filter(Objects::nonNull)
+                .collect(Collectors.toList())
+                .stream()
+                .reduce(BigDecimal::add);
+        return reduce.isPresent() ? reduce.get().doubleValue() : null;
+    }
+
+    public static BigDecimal cleanupNotNumber(String s) {
+        if (StringUtils.isBlank(s)) return null;
+        String s1 = pattern_number.matcher(s).replaceAll("");
+        try {
+            return new BigDecimal(s1).stripTrailingZeros();
+        } catch (Exception e) {
+            return null;
+        }
+    }
+
+
+    public static void main(String[] args) {
+        String info = "[{\"amomon\":\"100.250100万元\",\"paymet\":\"货币\",\"time\":\"2011-12-14\"},{\"amomon\":\"60.0000万元\",\"paymet\":\"货币\",\"time\":\"2012-02-08\"}]";
+        info = "[{\"amomon\":\"1.481万人民币\",\"paymet\":\"货币\",\"time\":\"2011-01-26\"},{\"amomon\":\"6.7万人民币\",\"paymet\":\"实物\",\"time\":\"2011-01-26\"}]";
+        GetCapitalActualAmt j = new GetCapitalActualAmt();
+        System.out.println(j.evaluate(info));
+    }
+}

+ 52 - 0
src/main/java/com/winhc/bigdata/udf/GetCaseAmt.java

@@ -0,0 +1,52 @@
+package com.winhc.bigdata.udf;
+
+import cn.hutool.core.util.StrUtil;
+import com.alibaba.fastjson.JSON;
+import com.alibaba.fastjson.JSONObject;
+import com.aliyun.odps.udf.UDF;
+import com.aliyun.odps.utils.StringUtils;
+
+import java.util.Comparator;
+import java.util.Optional;
+
+public class GetCaseAmt extends UDF {
+
+    public Double evaluate(String case_info) {
+
+        double case_amt = JSON.parseArray(Optional.ofNullable(case_info)
+                .orElse("[]"))
+                .toJavaList(JSONObject.class)
+                .stream()
+                .sorted(Comparator.comparing(x -> {
+                    JSONObject j = (JSONObject) x;
+                    return stageWeight(j.getString("case_stage"));
+                }).reversed())
+                .mapToDouble(y -> y.getDouble("amt"))
+                .findFirst().orElse(0);
+
+        return case_amt > 0 ? case_amt : null;
+    }
+
+    public Integer stageWeight(String stage_name) {
+        if (StringUtils.isBlank(stage_name)) return 0;
+        if (StrUtil.containsAny(stage_name, "执")) {
+            return 5;
+        }
+        if (StrUtil.containsAny(stage_name, "再")) {
+            return 4;
+        }
+        if (StrUtil.containsAny(stage_name, "二")) {
+            return 3;
+        }
+        if (StrUtil.containsAny(stage_name, "一")) {
+            return 2;
+        }
+        return 1;
+    }
+
+    public static void main(String[] args) {
+        GetCaseAmt c = new GetCaseAmt();
+        System.out.println(c.evaluate("[{\"case_stage\":\"民事一审\",\"amt\":7.645169},{\"case_stage\":\"民事二审\",\"amt\":7.645169}]"));
+    }
+
+}

Datei-Diff unterdrückt, da er zu groß ist
+ 101 - 0
src/main/java/com/winhc/bigdata/udf/GetCompanyChangeInfo.java


Datei-Diff unterdrückt, da er zu groß ist
+ 129 - 0
src/main/java/com/winhc/bigdata/udf/GetCompanyPatentNewInfo.java


+ 95 - 0
src/main/java/com/winhc/bigdata/udf/GetCompletionAcceptanceInfo.java

@@ -0,0 +1,95 @@
+package com.winhc.bigdata.udf;
+
+import com.alibaba.fastjson.JSONArray;
+import com.alibaba.fastjson.JSONObject;
+import com.aliyun.odps.udf.UDF;
+import org.apache.commons.codec.digest.DigestUtils;
+import org.apache.commons.lang3.StringUtils;
+
+import java.text.SimpleDateFormat;
+import java.time.LocalDateTime;
+import java.time.format.DateTimeFormatter;
+import java.util.Date;
+import java.util.List;
+import java.util.Objects;
+import java.util.Optional;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+import static com.winhc.bigdata.utils.CompanyUtils.cleanup;
+
+/**
+ * @Author: π
+ * @Date: 2020/5/14 16:26
+ * @Description: 建筑维度
+ */
+public class GetCompletionAcceptanceInfo extends UDF {
+
+    public static final DateTimeFormatter YYYY_MM_DDHHMMSS = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
+
+
+    public static String getDateString(Long tt) {
+        if (null == tt) return null;
+        Date d = new Date(tt);
+        return new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(d);
+    }
+
+    public static String nowDate(DateTimeFormatter pattern) {
+        if (pattern == null) {
+            pattern = DateTimeFormatter.ISO_DATE;
+        }
+        return LocalDateTime.now().format(pattern);
+    }
+
+
+    public List<String> evaluate(String info) {
+        if (StringUtils.isBlank(info)) return null;
+        return Optional.ofNullable(JSONObject.parseObject(info).getJSONArray("records"))
+                .orElse(new JSONArray())
+                .toJavaList(JSONObject.class).stream()
+                .filter(x -> StringUtils.isNotBlank(x.getString("prjNum")))
+                .map(d -> {
+                    JSONObject j = new JSONObject();
+                    String nowTime = nowDate(YYYY_MM_DDHHMMSS);
+                    String prjnum = d.getString("prjNum");
+                    String prjfinishnum = d.getString("prjFinishNum");
+                    String rowkey = concatws("_", prjnum, md5(cleanup(concatws("", prjfinishnum))));
+                    j.fluentPut("rowkey", rowkey)
+                            .fluentPut("provinceprjfinishnum", d.getString("provincePrjFinishNum"))
+                            .fluentPut("prjfinishnum", d.getString("prjFinishNum"))
+                            .fluentPut("builderlicencenum", d.getString("builderLicenceNum"))
+                            .fluentPut("factcost", d.getString("factCost"))
+                            .fluentPut("prjnum", d.getString("prjNum"))
+                            .fluentPut("factarea", d.getString("factArea"))
+                            .fluentPut("length", d.getString("length"))
+                            .fluentPut("span", d.getString("span"))
+                            .fluentPut("factsize", d.getString("factSize"))
+                            .fluentPut("bdate", d.getString("bDate") + " 00:00:00")
+                            .fluentPut("structural_system", null)
+                            .fluentPut("edate", d.getString("eDate")+ " 00:00:00")
+                            .fluentPut("mark", d.getString("mark"))
+                            .fluentPut("create_time", nowTime)
+                            .fluentPut("update_time", nowTime)
+                            .fluentPut("deleted", 0)
+                    ;
+                    return JSONObject.toJSONString(j);
+                })
+                .collect(Collectors.toList());
+    }
+
+
+    public static String concatws(String seq, String... strings) {
+        return Stream.of(strings).filter(Objects::nonNull).collect(Collectors.joining(seq));
+    }
+
+    public static String md5(String s) {
+        return DigestUtils.md5Hex(s);
+    }
+
+
+    public static void main(String[] args) {
+        String info = "{\"records\":[{\"bDate\":\"2017-11-03\",\"cREATEDATE\":1547811476000,\"dataLevel\":\"D\",\"eDate\":\"2018-06-01\",\"factArea\":\"0\",\"factCost\":2278.82,\"factSize\":\"/\",\"prjFinishNum\":\"4290051901220016-JX-001\",\"prjNum\":\"4290051901220016\",\"prjStructureTypeNum\":\"099\",\"provincePrjFinishNum\":\"4290051709280213-JX-004\"}],\"yhc_time\":\"2023-03-20\"}";
+        GetCompletionAcceptanceInfo j = new GetCompletionAcceptanceInfo();
+        System.out.println(j.evaluate(info));
+    }
+}

Datei-Diff unterdrückt, da er zu groß ist
+ 100 - 0
src/main/java/com/winhc/bigdata/udf/GetConstructionBidInfo.java


Datei-Diff unterdrückt, da er zu groß ist
+ 98 - 0
src/main/java/com/winhc/bigdata/udf/GetConstructionContractInfo.java


Datei-Diff unterdrückt, da er zu groß ist
+ 112 - 0
src/main/java/com/winhc/bigdata/udf/GetConstructionPermitInfo.java


Datei-Diff unterdrückt, da er zu groß ist
+ 127 - 0
src/main/java/com/winhc/bigdata/udf/GetConstructionProjectDetailInfo.java


Datei-Diff unterdrückt, da er zu groß ist
+ 48 - 0
src/main/java/com/winhc/bigdata/udf/GetHolderInfo.java


Datei-Diff unterdrückt, da er zu groß ist
+ 79 - 0
src/main/java/com/winhc/bigdata/udf/GetParticipatingUnitsInfo.java


+ 101 - 0
src/main/java/com/winhc/bigdata/udf/GetWorkingDrawingInfo.java

@@ -0,0 +1,101 @@
+package com.winhc.bigdata.udf;
+
+import com.alibaba.fastjson.JSONArray;
+import com.alibaba.fastjson.JSONObject;
+import com.aliyun.odps.udf.UDF;
+import org.apache.commons.codec.digest.DigestUtils;
+import org.apache.commons.lang3.StringUtils;
+
+import java.text.SimpleDateFormat;
+import java.time.LocalDateTime;
+import java.time.format.DateTimeFormatter;
+import java.util.Date;
+import java.util.List;
+import java.util.Objects;
+import java.util.Optional;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+import static com.winhc.bigdata.utils.CompanyUtils.cleanup;
+
+/**
+ * @Author: π
+ * @Date: 2020/5/14 16:26
+ * @Description: 建筑维度
+ */
+public class GetWorkingDrawingInfo extends UDF {
+
+    public static final DateTimeFormatter YYYY_MM_DDHHMMSS = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
+
+
+    public static String getDateString(Long tt) {
+        if (null == tt) return null;
+        Date d = new Date(tt);
+        return new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(d);
+    }
+
+    public static String nowDate(DateTimeFormatter pattern) {
+        if (pattern == null) {
+            pattern = DateTimeFormatter.ISO_DATE;
+        }
+        return LocalDateTime.now().format(pattern);
+    }
+
+
+    public List<String> evaluate(String info) {
+        if (StringUtils.isBlank(info)) return null;
+        return Optional.ofNullable(JSONObject.parseObject(info).getJSONArray("records"))
+                .orElse(new JSONArray())
+                .toJavaList(JSONObject.class).stream()
+                .filter(x -> StringUtils.isNotBlank(x.getString("prjNum")))
+                .map(d -> {
+                    JSONObject j = new JSONObject();
+                    String nowTime = nowDate(YYYY_MM_DDHHMMSS);
+                    String prjnum = d.getString("prjNum");
+                    String censornum = d.getString("censorNum");
+                    String rowkey = concatws("_", prjnum, md5(cleanup(concatws("", censornum))));
+                    j.fluentPut("rowkey", rowkey)
+                            .fluentPut("censornum", d.getString("censorNum"))
+                            .fluentPut("prjnum", d.getString("prjNum"))
+                            .fluentPut("provincecensornum", d.getString("provinceCensorNum"))
+                            .fluentPut("censorcorpname_credit_code", d.getString("censorCorpCode"))
+                            .fluentPut("censorcorpname", d.getString("censorCorpName"))
+                            .fluentPut("onecensorispass", d.getString("oneCensorIsPass"))
+                            .fluentPut("censoredate", d.getString("censorEDate") + " 00:00:00")
+                            .fluentPut("createdate", d.getString("createDate") + " 00:00:00")
+                            .fluentPut("one_review_number", d.getString("oneCensorWfqtCount"))
+                            .fluentPut("onecensorwfqtcontent", d.getString("oneCensorWfqtContent"))
+                            .fluentPut("is_joint_review", d.getString("isLS"))
+                            .fluentPut("fire_fighting_time", d.getString("xFCensorEDate"))
+                            .fluentPut("fire_fighting_number", d.getString("xFCensorNum"))
+                            .fluentPut("people_fighting_time", d.getString("rFCensorEDate"))
+                            .fluentPut("people_fighting_number", d.getString("rFCensorNum"))
+                            .fluentPut("people_fighting_institution", d.getString("rFCensorCorpName"))
+                            .fluentPut("fire_fighting_institution", d.getString("xFCensorCorpName"))
+                            .fluentPut("prjsize", d.getString("prjSize"))
+                            .fluentPut("datalevel", d.getString("dataLevel"))
+                            .fluentPut("create_time", nowTime)
+                            .fluentPut("update_time", nowTime)
+                            .fluentPut("deleted", 0)
+                    ;
+                    return JSONObject.toJSONString(j);
+                })
+                .collect(Collectors.toList());
+    }
+
+
+    public static String concatws(String seq, String... strings) {
+        return Stream.of(strings).filter(Objects::nonNull).collect(Collectors.joining(seq));
+    }
+
+    public static String md5(String s) {
+        return DigestUtils.md5Hex(s);
+    }
+
+
+    public static void main(String[] args) {
+        String info = "{\"records\":[{\"censorCorpCode\":\"79962762-0\",\"censorCorpName\":\"温州新正施工图审查咨询有限公司\",\"censorEDate\":\"2013-11-19\",\"censorNum\":\"3303001307010101-TX-001\",\"createDate\":\"2013-07-01\",\"dataLevel\":\"D\",\"oneCensorIsPass\":0,\"prjNum\":\"3303001307010101\",\"provinceCensorNum\":\"WKFD2013138\"}],\"yhc_time\":\"2023-03-24\"}";
+        GetWorkingDrawingInfo j = new GetWorkingDrawingInfo();
+        System.out.println(j.evaluate(info));
+    }
+}

+ 38 - 0
src/main/java/com/winhc/bigdata/udf/HistoryNameSplit.java

@@ -0,0 +1,38 @@
+package com.winhc.bigdata.udf;
+
+import com.aliyun.odps.udf.UDF;
+
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+/**
+ * @author: π
+ * 2020/12/16 16:06
+ */
+public class HistoryNameSplit extends UDF {
+
+    public List<String> evaluate(String names) {
+        Set<String> set = new HashSet<>();
+
+        if (org.apache.commons.lang3.StringUtils.isNotBlank(names)) {
+            set.addAll(Arrays.stream(names.split("\t;\t"))
+                    .filter(org.apache.commons.lang3.StringUtils::isNotBlank).collect(Collectors.toSet()));
+        }
+        return set.stream()
+                .filter(org.apache.commons.lang3.StringUtils::isNotBlank)
+                .map(x -> x.replaceAll("\t;", "")
+                        .replaceAll("[(]", "(")
+                        .replaceAll("[)]", ")")
+                        .trim()).distinct()
+                .collect(Collectors.toList());
+    }
+
+    public static void main(String[] args) {
+        String historyName = "海美電子有限公司	;	陕西十冶投资开发有限公司";
+        List<String> evaluate = new HistoryNameSplit().evaluate(historyName);
+        System.out.println(evaluate);
+    }
+}

+ 20 - 0
src/main/java/com/winhc/bigdata/udf/Json2Str.java

@@ -0,0 +1,20 @@
+package com.winhc.bigdata.udf;
+
+import com.alibaba.fastjson.JSON;
+import com.aliyun.odps.udf.UDF;
+import com.aliyun.odps.utils.StringUtils;
+
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+
+/**
+ * @author: XuJiakai
+ * 2020/12/16 16:06
+ */
+public class Json2Str extends UDF {
+
+    public String evaluate(String json, String splter) {
+        if (StringUtils.isBlank(json)) return null;
+        return String.join(splter, JSON.parseArray(json).toJavaList(String.class));
+    }
+}

Datei-Diff unterdrückt, da er zu groß ist
+ 54 - 0
src/main/java/com/winhc/bigdata/udf/Name2Lawyer.java


+ 32 - 0
src/main/java/com/winhc/bigdata/udf/NameCleanupV2.java

@@ -0,0 +1,32 @@
+package com.winhc.bigdata.udf;
+
+import com.aliyun.odps.udf.UDF;
+import com.aliyun.odps.utils.StringUtils;
+
+import java.util.regex.Pattern;
+
+/**
+ * @Author: π
+ * @Date: 2020/5/14 16:26
+ * @Description: 企业名称去重
+ */
+public class NameCleanupV2 extends UDF {
+    private static final Pattern pattern = Pattern.compile("[^\\u4e00-\\u9fa5a-zA-Z()() ·,]");
+
+    public String evaluate(String val) {
+        if(StringUtils.isBlank(val)) return "";
+        if(val.contains("某")) return "";
+        return pattern.matcher(val).replaceAll("");
+    }
+
+    public static void main(String[] args) {
+        NameCleanupV2 v = new NameCleanupV2();
+        System.out.println(v.evaluate("ABB(中国)有限公司"));
+        System.out.println(v.evaluate("46陈玉玲"));
+        System.out.println(v.evaluate("张某"));
+        System.out.println(v.evaluate("TCL—罗格朗国际电工(惠州)有限公司"));
+        System.out.println(v.evaluate("DAC中国特别机遇(巴巴多斯)有限公司[DACCHINASOS(BARBADOS)SRL]"));
+    }
+}
+
+

+ 40 - 0
src/main/java/com/winhc/bigdata/udf/RelationSplit.java

@@ -0,0 +1,40 @@
+package com.winhc.bigdata.udf;
+
+import com.aliyun.odps.udf.UDF;
+import com.aliyun.odps.utils.StringUtils;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * @Author: π
+ * @Date: 2023/1/30 16:26
+ * @Description: 关系切分
+ */
+public class RelationSplit extends UDF {
+
+    public static final Pattern pp1 = Pattern.compile("^\\((.+?)\\)<-\\[(.+?)]-\\((.+?)\\)$");
+    public static final Pattern pp2 = Pattern.compile("^\\((.+?)\\)<--\\((.+?)\\)$");
+
+    public List<String> evaluate(String relation) {
+        if (StringUtils.isBlank(relation)) return null;
+        Matcher matcher = pp1.matcher(relation);
+        if (matcher.matches()) {
+            return Arrays.asList(matcher.group(1), matcher.group(2), matcher.group(3), "1");
+        }
+        Matcher matcher2 = pp2.matcher(relation);
+        if (matcher2.matches()) {
+            return Arrays.asList(matcher2.group(1), "", matcher2.group(2), "2");
+        }
+        return null;
+    }
+
+    public static void main(String[] args) {
+        RelationSplit j = new RelationSplit();
+        System.out.println(j.evaluate("(d6b2b7cab5678bead810d637134b3656)<-[0.50776]-(123)"));
+        System.out.println(j.evaluate("(f943dcc2c5c80500d1e18bc2f9d36d07)<--(p06098c8f51c3774c234895d952a44a96)"));
+    }
+}

+ 72 - 0
src/main/java/com/winhc/bigdata/udf/ToProvinceCode.java

@@ -0,0 +1,72 @@
+package com.winhc.bigdata.udf;
+
+import com.aliyun.odps.udf.UDF;
+import com.winhc.bigdata.bean.Category;
+import org.apache.commons.lang3.StringUtils;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicReference;
+
+/**
+ * @author: π
+ * 2021/8/30 16:57
+ */
+public class ToProvinceCode extends UDF {
+
+    public static final Map<String, String> provinceMap = new HashMap<String, String>() {
+        {
+            put("62","GS");
+            put("44","GD");
+            put("45","GX");
+            put("52","GZ");
+            put("46","HAIN");
+            put("13","HB");
+            put("23","HLJ");
+            put("41","HEN");
+            put("42","HUB");
+            put("43","HUN");
+            put("32","JS");
+            put("36","JX");
+            put("22","JL");
+            put("21","LN");
+            put("15","NMG");
+            put("64","NX");
+            put("63","QH");
+            put("37","SD");
+            put("31","SH");
+            put("14","SX");
+            put("61","SAX");
+            put("51","SC");
+            put("12","TJ");
+            put("65","XJ");
+            put("54","XZ");
+            put("53","YN");
+            put("33","ZJ");
+            put("10","CN");
+            put("34","AH");
+            put("11","BJ");
+            put("50","CQ");
+            put("35","FJ");
+
+        }
+    };
+    public String evaluate(String code) {
+        if (StringUtils.isBlank(code)) {
+            return "";
+        }
+
+        return provinceMap.getOrDefault(code,"");
+    }
+
+    public static void main(String[] args) {
+        ToProvinceCode j = new ToProvinceCode();
+        System.out.println(j.evaluate("35"));
+        System.out.println(j.evaluate("32"));
+        System.out.println(j.evaluate("88"));
+        System.out.println(j.evaluate(""));
+
+    }
+
+}

+ 75 - 0
src/main/java/com/winhc/bigdata/udf/UrlMatch.java

@@ -0,0 +1,75 @@
+package com.winhc.bigdata.udf;
+
+import com.aliyun.odps.udf.UDF;
+import org.apache.commons.lang3.StringUtils;
+
+import java.util.regex.Pattern;
+
+/**
+ * @Author: π
+ * @Date: 2023/8/18
+ * @Description:
+ */
+public class UrlMatch extends UDF {
+
+
+    //    public static final Pattern url_pattern = Pattern.compile("^((http://)|(https://))?(www\\.)?([0-9a-zA-Z\\-_\\u4e00-\\u9fa5]+(\\.[a-zA-Z\\u4e00-\\u9fa5]+){1,2}).*$");
+//    public static final Pattern url_pattern = Pattern.compile("^(http:\\\\/\\\\/|https://)?[\\w\\-]+(\\.[\\w\\-]+)*([\\w\\-\\.,@?^=%&:/~\\+#;]*[\\w\\-\\@?^=%&/~\\+#;])?$");
+//    public static final Pattern url_pattern = Pattern.compile("^((http(s)?)?:\\/\\/)\\w+[^\\s]+(\\.[^\\s]+){1,}$");
+    public static final Pattern url_pattern = Pattern.compile("^((http:\\/\\/)|(https:\\/\\/))?\\w+[^\\s]+(\\.[^\\s]+){1,}$");
+
+//    public Boolean evaluate(String url) {
+//        if (StringUtils.isBlank(url)) return false;
+//        String re_url = toDBC(url).replaceAll("\\s+", "");
+//        if (re_url.contains("@")) return false;
+//        if (re_url.contains("http") && re_url.lastIndexOf("http") != 0) {
+//            return false;
+//        }
+//        return url_pattern.matcher(re_url).matches();
+//    }
+
+    public String evaluate(String url) {
+        if (StringUtils.isBlank(url)) return null;
+        String re_url = toDBC(url).replaceAll("\\s+", "");
+        if (StringUtils.isBlank(re_url) || re_url.contains("@")) return null;
+        if (re_url.contains("http") && re_url.lastIndexOf("http") != 0) {
+            return null;
+        }
+        boolean matches = url_pattern.matcher(re_url).matches();
+        if(!matches){
+            return null;
+        }
+        return re_url;
+    }
+
+    public static String toDBC(String str) {
+        if (str == null) {
+            return null;
+        }
+        char[] c = str.toCharArray();
+        for (int i = 0; i < c.length; i++) {
+            if (c[i] == '\u3000') {
+                c[i] = ' ';
+            } else if (c[i] > '\uFF00' && c[i] < '\uFF5F') {
+                c[i] = (char) (c[i] - 65248);
+
+            }
+        }
+        return new String(c);
+    }
+
+    public static void main(String[] args) {
+        UrlMatch res = new UrlMatch();
+        System.out.println(res.evaluate("https://dongmuwangchao.jiyoujia.com/shop/view_shop.htm?@spm=2013.1.w5002-23970198807.2.662c1de7PHwFzD"));
+        System.out.println(res.evaluate("https://meisidanli.tmall.com/shop/view_shop.htm?spm=a1z0k.7386009.1997989141.2.239b37de1RmRzi&amp;shop_id=514047593"));
+        System.out.println(res.evaluate("2.38 kpd:/ 复制打开抖音,看看【一号泉漂浮温泉酒店的作品】网红打卡一号泉漂浮温泉酒店(6)# 一号泉漂浮温泉... https://v.douyin.com/BbVNb7J/"));
+        System.out.println(res.evaluate(" http://www.lsgcjs.com"));
+        System.out.println(res.evaluate("http://www.ksplmd.com/"));
+        System.out.println(res.evaluate("3085421688"));
+        System.out.println(res.evaluate("https://hxcfzp0828.1688.com/"));
+        System.out.println(res.evaluate("https://shop1393895117009.1688.com/"));
+        System.out.println(res.evaluate("tianyiwenju.1688.com"));
+        System.out.println(res.evaluate("http://www.lsgcjs.com"));
+        System.out.println(res.evaluate("www.dyhz lc.com"));
+    }
+}

+ 84 - 0
src/main/java/com/winhc/bigdata/udf/finance_info_update.java

@@ -0,0 +1,84 @@
+package com.winhc.bigdata.udf;
+
+import com.aliyun.odps.udf.UDF;
+
+import java.util.Arrays;
+import java.util.List;
+
+import static org.apache.commons.lang.StringUtils.contains;
+import static org.apache.commons.lang.StringUtils.endsWith;
+
+/**
+ * @author: π
+ * 2023/3/13 16:57
+ * vendinc  营业总收入
+ * maibusinc  主营业务收入
+ * assgro  资产总额
+ * progro  利润总额
+ * netinc  净利润
+ * ratgro  纳税总额
+ */
+public class finance_info_update extends UDF {
+
+    public List<Double> evaluate(String company_name, Double vendinc, Double maibusinc, Double assgro, Double progro
+            , Double netinc, Double ratgro, Double reg_capital_amount, String company_org_type) {
+
+
+        //营收/注册资本倍数
+        int ve_times = 1000;
+        //缩小比例
+        Integer reduce_times = 10000;
+        //营收大于100亿,营收是注册资本1000倍,非分公司
+        if (vendinc != null && reg_capital_amount != null && vendinc > 100 * 10000
+                && vendinc / (reg_capital_amount / 1000000) >= ve_times
+                && !(contains(company_org_type, "分") || endsWith(company_name, "分公司"))){
+
+            Double tmp_vendinc = vendinc / reduce_times;
+            Double tmp_maibusinc = maibusinc;
+            Double tmp_assgro = assgro;
+            Double tmp_progro = progro;
+            Double tmp_netinc = netinc;
+            Double tmp_ratgro = ratgro;
+            //营收必须大于主营业务
+            if (maibusinc != null && tmp_vendinc < maibusinc) {
+                tmp_maibusinc = maibusinc / reduce_times;
+            }
+            //资产总额大于营收1000倍,或者大于10亿
+            if (assgro != null && (assgro / tmp_vendinc > 1000 || assgro > 10 * 10000)) {
+                tmp_assgro = assgro / reduce_times;
+            }
+            //利润总额
+            //净利润
+            if (progro != null && (tmp_vendinc < progro || Math.abs(progro) / tmp_vendinc > 1000)) {
+                tmp_progro = progro / reduce_times;
+            }
+            if (netinc != null && (tmp_vendinc < netinc || Math.abs(netinc) / tmp_vendinc > 1000)) {
+                tmp_netinc = netinc / reduce_times;
+            }
+            //纳税总额大于营收 或者 大于10000
+            if (ratgro != null && (tmp_vendinc < ratgro || ratgro > 10000)) {
+                tmp_ratgro = ratgro / reduce_times;
+            }
+            return Arrays.asList(tmp_vendinc, tmp_maibusinc, tmp_assgro, tmp_progro, tmp_netinc, tmp_ratgro);
+        } else{
+            return Arrays.asList(vendinc, maibusinc, assgro, progro, netinc, ratgro);
+        }
+    }
+
+
+    public static void main(String[] args) {
+        finance_info_update j = new finance_info_update();
+        String company_name = "嘉峪关市汇鹏昌隆商贸有限责任公司";
+        Double vendinc = 1192934.52;
+        Double maibusinc = 119.293452;
+        Double assgro = 160.75;
+        Double progro = 1.9;
+        Double netinc = 1.81;
+        Double ratgro = 1.99;
+        Double reg_capital_amount = 200000000D;
+        String company_org_type = "有限责任公司(自然人独资)";
+        List<Double> re = j.evaluate(company_name, vendinc, maibusinc, assgro, progro, netinc, ratgro, reg_capital_amount, company_org_type);
+        System.out.println(re);
+    }
+
+}

+ 43 - 0
src/main/java/com/winhc/bigdata/udf/get_content.java

@@ -0,0 +1,43 @@
+package com.winhc.bigdata.udf;
+
+import com.alibaba.fastjson.JSON;
+import com.alibaba.fastjson.JSONArray;
+import com.alibaba.fastjson.JSONObject;
+import com.aliyun.odps.udf.UDF;
+import com.aliyun.odps.utils.StringUtils;
+
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+/**
+ * @author: π
+ * 2021/8/30 16:57
+ */
+public class get_content extends UDF {
+
+    public List<String> evaluate(String json) {
+        if (StringUtils.isBlank(json) || "[]".equalsIgnoreCase(json)) return null;
+        try {
+            List<JSONObject> rr = JSON.parseArray(json)
+                    .toJavaList(JSONObject.class);
+            return rr.stream().map(j1 -> {
+                String flag = j1.getString("flag");
+                String detail_id = j1.getString("detail_id");
+                return flag + "@@" + detail_id;
+            }).distinct().collect(Collectors.toList());
+        } catch (Exception e) {
+            e.printStackTrace();
+            return null;
+        }
+
+    }
+
+    public static void main(String[] args) {
+        get_content j = new get_content();
+        String json = "[{\"flag\":\"0\",\"date\":\"2016-11-18 00:00:00\",\"detail_id\":\"c987a6b2618bff7111b6c49f538b124c\",\"doc_type\":\"裁定书\"}]";
+        List<String> evaluate = j.evaluate(json);
+        System.out.println(evaluate);
+    }
+
+}

+ 51 - 0
src/main/java/com/winhc/bigdata/udf/get_content_update.java

@@ -0,0 +1,51 @@
+package com.winhc.bigdata.udf;
+
+import com.alibaba.fastjson.JSON;
+import com.alibaba.fastjson.JSONObject;
+import com.aliyun.odps.udf.UDF;
+import com.aliyun.odps.utils.StringUtils;
+
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+/**
+ * @author: π
+ * 2021/8/30 16:57
+ */
+public class get_content_update extends UDF {
+
+    public String evaluate(String json, String deleted_json) {
+        if (StringUtils.isBlank(json) || "[]".equalsIgnoreCase(json)) return null;
+        try {
+            Map<String, JSONObject> mm = JSON.parseArray(deleted_json)
+                    .toJavaList(JSONObject.class)
+                    .stream()
+                    .collect(Collectors.toMap(x -> x.getString("detail_id") + x.getString("flag"), x -> x, (n, o) -> n));
+
+            List<JSONObject> rr = JSON.parseArray(json)
+                    .toJavaList(JSONObject.class);
+            List<JSONObject> r2 = rr.stream().peek(j1 -> {
+                String id = j1.getString("detail_id") + j1.getString("flag");
+                if (mm.containsKey(id)) {
+                    j1.fluentPut("deleted", mm.get(id).getString("deleted"));
+                }
+            }).distinct().collect(Collectors.toList());
+
+            return JSON.toJSONString(r2);
+        } catch (Exception e) {
+            e.printStackTrace();
+            return null;
+        }
+
+    }
+
+    public static void main(String[] args) {
+        get_content_update j = new get_content_update();
+        String json = "[{\"flag\":\"0\",\"date\":\"2016-11-18 00:00:00\",\"detail_id\":\"0bec8f848e0b78eafb9c3df5fd1381f7\",\"doc_type\":\"裁定书\"},{\"flag\":\"0\",\"date\":\"2016-11-18 00:00:00\",\"detail_id\":\"c32860be4264558793188f3a9c23dbb5\",\"doc_type\":\"裁定书\"}]";
+        String detail_json = "[{\"detail_id\":\"0bec8f848e0b78eafb9c3df5fd1381f7\",\"flag\":\"0\",\"deleted\":1},{\"detail_id\":\"c32860be4264558793188f3a9c23dbb5\",\"flag\":\"0\",\"deleted\":0},{\"detail_id\":\"c32860be4264558793188f3a9c23dbb5\",\"flag\":\"6\",\"deleted\":0}]";
+        String evaluate = j.evaluate(json, detail_json);
+        System.out.println(evaluate);
+    }
+
+}

Datei-Diff unterdrückt, da er zu groß ist
+ 44 - 0
src/main/java/com/winhc/bigdata/udf/get_info_company.java


+ 1 - 2
src/main/java/com/winhc/bigdata/udf/get_legal_v2.java

@@ -15,7 +15,6 @@ import java.util.stream.Collectors;
  * 2021/8/30 16:57
  */
 public class get_legal_v2 extends UDF {
-    private static List<String> cols = Arrays.asList("amomon", "paymet", "time");
 
     public List<String> evaluate(String json) {
         if (StringUtils.isBlank(json) || "[]".equalsIgnoreCase(json)) return null;
@@ -26,7 +25,7 @@ public class get_legal_v2 extends UDF {
                 String id = toStringV2(m.getOrDefault("id", ""));
                 String deleted = toStringV2(m.getOrDefault("deleted", ""));
                 if (deleted.equals("0") || deleted.equals("1")) {
-                    return name + "@@" + id;
+                    return name + "@@" + id + "@@" + deleted;
                 } else return null;
             }).filter(StringUtils::isNotBlank).distinct()
                     .collect(Collectors.toList());

+ 33 - 0
src/main/java/com/winhc/bigdata/udf/get_legal_v3.java

@@ -0,0 +1,33 @@
+package com.winhc.bigdata.udf;
+
+import com.alibaba.fastjson.JSON;
+import com.alibaba.fastjson.JSONObject;
+import com.aliyun.odps.udf.UDF;
+import com.aliyun.odps.utils.StringUtils;
+
+import java.util.List;
+import java.util.stream.Collectors;
+
+/**
+ * @author: π
+ * 2021/8/30 16:57
+ */
+public class get_legal_v3 extends UDF {
+
+    public String evaluate(String json) {
+        if (StringUtils.isBlank(json) || "[]".equalsIgnoreCase(json)) return null;
+        List<JSONObject> list = JSON.parseArray(json).toJavaList(JSONObject.class);
+        List<String> re = list.stream().filter(x -> "0".equalsIgnoreCase(x.getString("deleted"))).map(x -> x.getString("name"))
+                .filter(org.apache.commons.lang3.StringUtils::isNotBlank).distinct().collect(Collectors.toList());
+        return JSON.toJSONString(re);
+    }
+
+
+    public static void main(String[] args) {
+        get_legal_v3 j = new get_legal_v3();
+        String json = "[{\"name\":\"冯金元111\",\"id\":\"pbc6c4f933e85850d1c401509877671b0\",\"type\":1,\"deleted\":0},{\"name\":\"冯金元\",\"id\":\"pbc6c4f933e85850d1c401509877671b0\",\"type\":1,\"deleted\":1},{\"name\":\"冯金元2\",\"id\":\"pbc6c4f933e85850d1c401509877671b0\",\"type\":1,\"deleted\":0}]";
+        String evaluate = j.evaluate(json);
+        System.out.println(evaluate);
+    }
+
+}

+ 62 - 0
src/main/java/com/winhc/bigdata/udf/get_sqrinfo.java

@@ -0,0 +1,62 @@
+package com.winhc.bigdata.udf;
+
+import com.alibaba.fastjson.JSON;
+import com.alibaba.fastjson.JSONArray;
+import com.alibaba.fastjson.JSONObject;
+import com.alibaba.fastjson.JSONPath;
+import com.aliyun.odps.udf.UDF;
+import com.aliyun.odps.utils.StringUtils;
+
+import java.util.*;
+
+/**
+ * @Author: π
+ * @Description: 获取json 里字段
+ */
+public class get_sqrinfo extends UDF {
+
+    public String evaluate(String applicant_info, String yg_name) {
+        if (StringUtils.isBlank(applicant_info)) {
+            return null;
+        }
+        try {
+            Map<String, Object> m1 = new HashMap<>();
+            String[] jsonArr = applicant_info.split("&");
+            for (String json : jsonArr) {
+                parse(json, m1);
+            }
+            if (m1.size() == 1) return JSONObject.toJSONString(m1.values());
+            Map<String, Object> m2 = new HashMap<>();
+            parse(yg_name, m2);
+            if (m2.size() == 1) return JSONObject.toJSONString(m2.values());
+            return null;
+        } catch (Exception e) {
+            return null;
+        }
+    }
+
+    public void parse(String info, Map<String, Object> m1) {
+        if (StringUtils.isBlank(info)) return;
+        List<JSONObject> jo = JSON.parseArray(info, JSONObject.class);
+        jo.forEach(j1 -> {
+            String name = j1.getString("name");
+            String litigant_id = j1.getString("litigant_id");
+            if (StringUtils.isNotBlank(name)) {
+                JSONObject jj = new JSONObject()
+                        .fluentPut("name", name)
+                        .fluentPut("litigant_id", litigant_id);
+                m1.put(name, jj);
+            }
+        });
+    }
+
+    public static void main(String[] args) {
+        String jsons = "[{\"name\":\"张海林\",\"litigant_id\":\"\"},{\"name\":\"招商银行股份有限公司信用卡中心\",\"litigant_id\":\"cc45eeb0634f73531ba54ad55ba152a6\"}]&[{\"name\":\"上海浦东发展银行静安支行\",\"litigant_id\":\"c2c57fcc6398cfd49393c1a2d3c35a4c\"}]";
+        String yg_name = "[{\"name\":\"招商银行股份有限公司信用卡中心\",\"litigant_id\":\"cc45eeb0634f73531ba54ad55ba152a6\"}]";
+        get_sqrinfo n = new get_sqrinfo();
+        System.out.println(n.evaluate(jsons, yg_name));
+    }
+
+}
+
+

+ 2 - 3
src/main/java/com/winhc/bigdata/udf/get_url.java

@@ -12,7 +12,7 @@ import java.util.regex.Pattern;
  */
 public class get_url extends UDF {
 
-    Pattern pattern2 = Pattern.compile("^((http://)|(https://))?([a-zA-Z0-9]([a-zA-Z0-9\\-]{0,61}[a-zA-Z0-9])?\\.)+[a-zA-Z0-9]{2,6}(/)");
+    Pattern pattern2 = Pattern.compile("^((http:\\/\\/)|(https:\\/\\/))?([a-zA-Z0-9]([a-zA-Z0-9\\-]{0,61}[a-zA-Z0-9])?\\.)+[a-zA-Z0-9]{2,6}(/)");
     Pattern pattern1 = Pattern.compile("http.+?html|http.+?cn|http.+?com|http.+/");
 
     public String evaluate(String url) {
@@ -21,7 +21,6 @@ public class get_url extends UDF {
         }
         Matcher matcher = pattern2.matcher(url);
         if (matcher.find()) {
-            System.out.println(matcher.group());
             return matcher.group();
         }
         return "";
@@ -29,7 +28,7 @@ public class get_url extends UDF {
 
     public static void main(String[] args) {
         String str1 ="http://ssfw.zqdhfy.gov.cn/ktxx.aspx?cateId=15";
-        String str ="http://114.252.22.26.35/xx/yy/ktxx.aspx?cateId=15";
+        String str ="http://171.106.48.55:8899/legalsystem/ReportServer?_=1591608844498&__boxModel__=true&op=page_content&sessionID=1414&pn=779";
         String res = new get_url().evaluate(str);
         System.out.println(res);
 

+ 41 - 0
src/main/java/com/winhc/bigdata/udf/get_url2.java

@@ -0,0 +1,41 @@
+package com.winhc.bigdata.udf;
+
+import com.aliyun.odps.udf.UDF;
+import org.apache.commons.lang.StringUtils;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * @Author: π
+ * @Description: url 提取
+ */
+public class get_url2 extends UDF {
+
+
+
+    public String evaluate(String url) {
+        if (StringUtils.isBlank(url)) {
+            return "";
+        }
+        String re = "";
+        try {
+            int i = url.indexOf("/", 8);
+            re = url.substring(0, i + 1);
+        } catch (Exception e) {
+            return "";
+        }
+        return re;
+    }
+
+    public static void main(String[] args) {
+        String str1 = "http://ssfw.zqdhfy.gov.cn/ktxx.aspx?cateId=15";
+        String str2 = "https://lnsfw.lnsfy.gov.cn//lawsuit/api/case-center/v1/third/court/external/getCourtAnnouncementInfo";
+        String str = "http://171.106.48.55:8899/legalsystem/ReportServer?_=1591608844498&__boxModel__=true&op=page_content&sessionID=1414&pn=779";
+        get_url2 dd = new get_url2();
+        System.out.println(dd.evaluate(str));
+        System.out.println(dd.evaluate(str1));
+        System.out.println(dd.evaluate(str2));
+
+    }
+}

+ 75 - 0
src/main/java/com/winhc/bigdata/udf/judge_number.java

@@ -0,0 +1,75 @@
+package com.winhc.bigdata.udf;
+
+import com.aliyun.odps.udf.UDF;
+
+import java.util.*;
+import java.util.concurrent.atomic.AtomicReference;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+/**
+ * @author: π
+ * 2021/8/30 16:57
+ */
+public class judge_number extends UDF {
+
+    public Boolean evaluate(Long amount, Integer count) {
+        if (amount == null || amount <= 0) {
+            return false;
+        }
+
+        String[] split = amount.toString().split("");
+
+        if (split.length <= 3) {
+            return false;
+        }
+
+        Map<String, Integer> map = new HashMap<>();
+
+        AtomicReference<String> maxKey = new AtomicReference<>("");
+        AtomicReference<Integer> maxCount = new AtomicReference<>(0);
+        AtomicReference<String> lastKey = new AtomicReference<>("");
+        //记录每个元素出现的次数
+        Arrays.stream(split).forEach(s -> {
+            if (map.containsKey(s)) {
+                if (lastKey.get().equals(s)) {
+                    int tmp_count = map.get(lastKey.get()) + 1;
+                    if (tmp_count > maxCount.get()) {
+                        maxCount.set(tmp_count);
+                        maxKey.set(lastKey.get());
+                    }
+                    map.put(lastKey.get(), tmp_count);
+                } else {
+                    lastKey.set(s);
+                    map.put(s, 1);
+                }
+            } else {
+                lastKey.set(s);
+                map.put(s, 1);
+                //第一次
+                if (lastKey.get().equals("")) {
+                    maxKey.set(s);
+                    maxCount.set(1);
+                }
+
+            }
+        });
+
+        System.out.println(maxKey.get() + "----" + maxCount.get() + "----" + lastKey.get());
+
+        if (lastKey.get().equals("0") || count >= maxCount.get()) {
+            return false;
+        }
+        return true;
+
+    }
+
+    public static void main(String[] args) {
+        judge_number j = new judge_number();
+        System.out.println(j.evaluate(19992252299999L, 3));
+        System.out.println(j.evaluate(100000100200L, 3));
+        System.out.println(j.evaluate(1000001002001L, 3));
+
+    }
+
+}

+ 71 - 0
src/main/java/com/winhc/bigdata/udf/judrisk/ExtractCompanyCase.java

@@ -0,0 +1,71 @@
+package com.winhc.bigdata.udf.judrisk;
+
+import com.aliyun.odps.udf.UDF;
+
+public class ExtractCompanyCase extends UDF {
+    // TODO define parameters and return type, e.g:  public String evaluate(String a, String b)
+    public String evaluate(String companyId, String ygName, String bgName) {
+        String ret = null;
+
+        if(ygName != null && ygName.contains(companyId)){
+            ret = "Y";
+        }else if(bgName != null && bgName.contains(companyId)){
+            ret = "B";
+        }else{
+            ret = "T";
+        }
+        return ret;
+    }
+
+
+    public String evaluate(String companyId, String ygName, String bgName, String isSuccess) {
+        String ret = null;
+
+        if(isSuccess != null){
+            if(ygName!=null && ygName.contains(companyId) ){
+                if(isSuccess.equals("胜")){
+                    ret = "胜";
+                }else if(isSuccess.equals("负")){
+                    ret = "负";
+                }else if(isSuccess.equals("平")){
+                    ret = "平";
+                }else{
+                    ret = "无";
+                }
+            }else if(bgName!= null && bgName.contains(companyId) ){
+                if(isSuccess.equals("胜")){
+                    ret = "负";
+                }else if(isSuccess.equals("负")){
+                    ret = "胜";
+                }else if(isSuccess.equals("平")){
+                    ret = "平";
+                }else {
+                    ret = "无";
+                }
+            }else{
+                ret = "无";
+            }
+        }else{
+            ret = "无";
+        }
+        return ret;
+    }
+
+
+
+    public String evaluate(String companyId, String yg_name, String bg_name, String ygLawyer, String bgLawyer) {
+        String ret = "";
+        if (yg_name!= null && yg_name.contains(companyId)) {
+            ret = ygLawyer;
+        } else if (bg_name!=null && bg_name.contains(companyId)) {
+            ret = bgLawyer;
+        } else {
+            ret = "";
+        }
+
+        return ret;
+    }
+
+
+
+}

+ 46 - 0
src/main/java/com/winhc/bigdata/udf/number_parse.java

@@ -0,0 +1,46 @@
+package com.winhc.bigdata.udf;
+
+import com.alibaba.fastjson.JSON;
+import com.alibaba.fastjson.JSONArray;
+import com.alibaba.fastjson.JSONObject;
+import com.aliyun.odps.udf.UDF;
+import org.apache.commons.lang3.StringUtils;
+
+import java.util.concurrent.atomic.AtomicBoolean;
+
+/**
+ * @author: π
+ * 2021/8/30 16:57
+ */
+public class number_parse extends UDF {
+
+
+    public Boolean evaluate(String info) {
+        AtomicBoolean result = new AtomicBoolean(false);
+        if (StringUtils.isBlank(info)) {
+            return result.get();
+        }
+
+        JSONArray jj = JSON.parseArray(info);
+        jj.toJavaList(JSONObject.class)
+                .forEach(d -> {
+                    String source = d.getString("source");
+                    String is_phone = d.getString("is_phone");
+                    if (StringUtils.isNotBlank(source) && StringUtils.isNotBlank(is_phone)
+                            && !source.contains("年报") && is_phone.equals("1")
+                    ) {
+                        result.set(true);
+                    }
+                });
+        return result.get();
+    }
+
+    public static void main(String[] args) {
+        number_parse j = new number_parse();
+        System.out.println(j.evaluate("[{\"deleted\":\"0\",\"is_phone\":2,\"tel\":\"0576-82618583\",\"source\":\"2015年\"},{\"deleted\":\"0\",\"is_phone\":1,\"tel\":\"15968660117\",\"source\":\"2017年报\"},{\"deleted\":\"0\",\"is_phone\":2,\"tel\":\"15968660117\",\"source\":\"首商网\"}]"));
+        System.out.println(j.evaluate("[{\"deleted\":\"0\",\"is_phone\":2,\"tel\":\"0576-82618583\",\"source\":\"首商网\"},{\"deleted\":\"0\",\"is_phone\":1,\"tel\":\"15968660117\",\"source\":\"首商网\"}]"));
+        System.out.println(j.evaluate(""));
+
+    }
+
+}

+ 32 - 0
src/main/java/com/winhc/bigdata/udf/reg_capital_update.java

@@ -0,0 +1,32 @@
+package com.winhc.bigdata.udf;
+
+import com.aliyun.odps.udf.UDF;
+import com.aliyun.odps.utils.StringUtils;
+
+import java.util.regex.Pattern;
+
+/**
+ * @Author: π
+ * @Description: 注册资本判断
+ */
+public class reg_capital_update extends UDF {
+    private static final Pattern pattern = Pattern.compile("[^\\u4e00-\\u9fa5]");
+
+    public Boolean evaluate(String reg_capital, Long reg_capital_amount) {
+        if(StringUtils.isBlank(reg_capital)){
+            return false;
+        }
+        String r = StringUtils.isNotBlank(reg_capital) ? pattern.matcher(reg_capital).replaceAll("").replaceAll("(万元)|(万)", "") : reg_capital;
+        if (r.equals("")) {
+            return true;
+        }
+        return null !=  reg_capital_amount && reg_capital_amount <= 0L;
+    }
+
+    public static void main(String[] args) {
+        reg_capital_update res = new reg_capital_update();
+        System.out.println(res.evaluate("中国,人123,za.早上5元万美元", -5L));
+        System.out.println(res.evaluate("", null));
+        System.out.println(res.evaluate(null, null));
+    }
+}

+ 9 - 4
src/main/java/com/winhc/bigdata/udf/split_names.java

@@ -35,7 +35,11 @@ public class split_names extends UDF {
     }
 
     public void parse(String json, String json_path, Set<String> set) {
-        JSONArray jsonArray = ((JSONArray) JSONPath.eval(JSON.parse(json), json_path));
+        List<Object> eval = (List)JSONPath.eval(JSON.parse(json), json_path);
+        if(eval == null || eval.isEmpty()){
+            return;
+        }
+        JSONArray jsonArray = ((JSONArray) eval);
         for (Object s : jsonArray) {
             if (s != null && s.toString().length() > 0) {
                 set.add(s.toString());
@@ -45,10 +49,11 @@ public class split_names extends UDF {
 
     public static void main(String[] args) {
         //String jsons = "[{\"name\":\"张海林\",\"litigant_id\":\"\"},{\"name\":\"招商银行股份有限公司信用卡中心\",\"litigant_id\":\"cc45eeb0634f73531ba54ad55ba152a6\"}]&[{\"name\":\"上海浦东发展银行静安支行\",\"litigant_id\":\"c2c57fcc6398cfd49393c1a2d3c35a4c\"}]";
+        String jsons = "[{\"pledgor\": \"深圳市绿源半导体技术有限公司\", \"pledgor_id\": \"88d58a568f7c1694b6ef1c374de2e7c7\"}]&[{\"pledgee\": \"深圳市绿源半导体技术有限公司\", \"pledgee_id\": \"88d58a568f7c1694b6ef1c374de2e7c7\"}]&[{\"litigant_id\":\"88d58a568f7c1694b6ef1c374de2e7c7\"}]";
         //String jsons = "[{\"name\":\"南京颍州电气设备有限公司\",\"litigant_id\":\"\",\"date\":\"2010-06-22T00:00:00.000+08:00\",\"exec_money\":3.692949}]";
-        //String json_path = "$.exec_money";
-        String jsons = "[\"肖毅\",\"赵磊\"]";
-        String json_path = "$[*]";
+        String json_path = "$.pledgor_id";
+        //String jsons = "[\"肖毅\",\"赵磊\"]";
+        //String json_path = "$[*]";
         split_names n = new split_names();
         System.out.println(n.evaluate(jsons, json_path));
     }

Datei-Diff unterdrückt, da er zu groß ist
+ 31492 - 0
src/main/resources/all_court.txt