Browse Source

feat: add

许家凯 3 năm trước cách đây
mục cha
commit
94e3ab49c2

+ 5 - 0
pom.xml

@@ -63,6 +63,11 @@
             <version>1.2.72</version>
         </dependency>
 
+        <dependency>
+            <groupId>cn.hutool</groupId>
+            <artifactId>hutool-all</artifactId>
+            <version>5.6.6</version>
+        </dependency>
 
     </dependencies>
 

+ 102 - 0
src/main/java/com/winhc/bigdata/udf/JsonArrayIdAndNameSymmetricCheck.java

@@ -0,0 +1,102 @@
+package com.winhc.bigdata.udf;
+
+import com.alibaba.fastjson.JSON;
+import com.alibaba.fastjson.JSONArray;
+import com.alibaba.fastjson.JSONObject;
+import com.alibaba.fastjson.JSONPath;
+import com.aliyun.odps.udf.UDF;
+import com.aliyun.odps.utils.StringUtils;
+
+import java.util.HashSet;
+import java.util.IntSummaryStatistics;
+import java.util.Objects;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+
+/**
+ * @author: XuJiakai
+ * 2021/6/2 10:49
+ */
+public class JsonArrayIdAndNameSymmetricCheck extends UDF {
+    private static final Set<String> condition = new HashSet<String>() {
+        {
+            add("gt");
+            add("gte");
+            add("lt");
+            add("lte");
+            add("GT");
+            add("GTE");
+            add("LT");
+            add("LTE");
+        }
+    };
+
+    public Boolean evaluate(String val, String id_json_path, String name_json_path, String ignore_condition, Integer ignore_size) {
+        if (!condition.contains(ignore_condition)) {
+            throw new RuntimeException("请输入正确的表达式:gt,gte,lt,lte");
+        }
+        if (StringUtils.isEmpty(val)) {
+            return true;
+        }
+        try {
+            JSONArray jsonArray = JSON.parseArray(val);
+            IntSummaryStatistics collect = jsonArray.stream().map(o -> {
+                JSONObject jsonObject = ((JSONObject) o);
+                String id = (String) JSONPath.eval(jsonObject, id_json_path);
+                String name = (String) JSONPath.eval(jsonObject, name_json_path);
+
+                if (StringUtils.isEmpty(name) && StringUtils.isNotBlank(id)) {
+                    throw new RuntimeException("存在name为空,但id有值数据,请手动排查!");
+                }
+                if (StringUtils.isEmpty(name)) {
+                    return null;
+                }
+                if (StringUtils.isNotBlank(id)) {
+                    return null;
+                }
+
+                if (isContinue(name, ignore_condition, ignore_size)) {
+                    return null;
+                }
+                return 1;
+            }).filter(Objects::nonNull).collect(Collectors.summarizingInt(x -> x));
+            long sum = collect.getSum();
+            return sum == 0;
+        } catch (Exception e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    private Boolean isContinue(String name, String ignore_condition, int ignore_size) {
+        switch (ignore_condition.toUpperCase()) {
+            case "GT":
+                if (name.length() > ignore_size) {
+                    return true;
+                }
+            case "GTE":
+                if (name.length() >= ignore_size) {
+                    return true;
+                }
+            case "LT":
+                if (name.length() < ignore_size) {
+                    return true;
+                }
+            case "LTE":
+                if (name.length() <= ignore_size) {
+                    return true;
+                }
+            default:
+                return false;
+        }
+    }
+
+    public static void main(String[] args) {
+        JsonArrayIdAndNameSymmetricCheck
+                check = new JsonArrayIdAndNameSymmetricCheck();
+        Boolean lte = check.evaluate("[{\"pledgee\":\"巴东恒信担保有限公司\",\"pledgee_id\":\"\"},{\"pledgor\":\"汪建银\",\"pledgor_id\":\"\"}]"
+                , "$.pledgee_id", "$.pledgee", "lte", 3);
+
+        System.out.println(lte);
+    }
+}

+ 47 - 0
src/main/java/com/winhc/bigdata/udf/JudicialCaseRelationEqu.java

@@ -0,0 +1,47 @@
+package com.winhc.bigdata.udf;
+
+import com.aliyun.odps.udf.UDF;
+import com.winhc.bigdata.utils.CaseConnectUtils;
+import org.apache.commons.lang.StringUtils;
+
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * @author: XuJiakai
+ * 2021/6/8 11:48
+ */
+public class JudicialCaseRelationEqu extends UDF {
+    private static final Set<String> emptySet = new HashSet<>();
+
+    public Boolean evaluate(String currentCaseParty, String connectCaseParty
+            , String currentCaseNo, String connectCaseNo
+            , String currentCourtName, String connectCourtName) {
+
+        Set<String> currentCasePartyList = emptySet;
+        Set<String> connectCasePartyList = emptySet;
+
+        if (currentCaseNo == null) {
+            currentCaseNo = "";
+        }
+        if (connectCaseNo == null) {
+            connectCaseNo = "";
+        }
+        if (currentCourtName == null) {
+            currentCourtName = "";
+        }
+        if (connectCourtName == null) {
+            connectCourtName = "";
+        }
+        if (!StringUtils.isEmpty(currentCaseParty)) {
+            currentCasePartyList = new HashSet<String>(Arrays.asList(currentCaseParty.split("\001")));
+        }
+
+        if (!StringUtils.isEmpty(connectCaseParty)) {
+            connectCasePartyList = new HashSet<String>(Arrays.asList(connectCaseParty.split("\001")));
+        }
+
+        return CaseConnectUtils.isConnect(currentCasePartyList, connectCasePartyList, currentCaseNo, connectCaseNo, currentCourtName, connectCourtName);
+    }
+}

+ 30 - 0
src/main/java/com/winhc/bigdata/udf/ReplaceChinese2English.java

@@ -0,0 +1,30 @@
+package com.winhc.bigdata.udf;
+
+import cn.hutool.core.convert.Convert;
+import com.aliyun.odps.udf.UDF;
+import com.aliyun.odps.utils.StringUtils;
+
+/**
+ * @author: XuJiakai
+ * 2021/6/1 11:24
+ */
+public class ReplaceChinese2English extends UDF {
+    public String evaluate(String val) {
+        if (StringUtils.isEmpty(val)) {
+            return null;
+        }
+        val = Convert.toDBC(val).trim();
+        val = val.replace(",", ",")
+                .replace("(", "(")
+                .replace(")", ")")
+                .replace("。", ".")
+                .replace(";", ";")
+                .replace("!", "!")
+                .replace("?", "?")
+                .replace(":", ":")
+                .replace(""", "\"")
+                .replace("“", "\"")
+                .replace("”", "\"");
+        return val;
+    }
+}

+ 25 - 0
src/main/java/com/winhc/bigdata/udf/ToOrdinalStr.java

@@ -0,0 +1,25 @@
+package com.winhc.bigdata.udf;
+
+import com.aliyun.odps.udf.UDF;
+import com.aliyun.odps.utils.StringUtils;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Collectors;
+
+/**
+ * @author: XuJiakai
+ * 2021/6/11 17:26
+ */
+public class ToOrdinalStr extends UDF {
+    public String evaluate(String str, String... other) {
+        List<String> list = new ArrayList<>();
+        list.addAll(Arrays.asList(other));
+        list.add(str);
+        return list.stream()
+                .filter(StringUtils::isNotBlank)
+                .sorted()
+                .collect(Collectors.joining());
+    }
+}

+ 91 - 0
src/main/java/com/winhc/bigdata/utils/CaseConnectUtils.java

@@ -0,0 +1,91 @@
+package com.winhc.bigdata.utils;
+
+import cn.hutool.core.util.NumberUtil;
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.commons.lang.StringUtils;
+
+import java.util.*;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+
+/**
+ * @author: XuJiakai
+ * 2021/6/8 13:36
+ */
+public class CaseConnectUtils {
+    private static final Pattern vagueWordPat = Pattern.compile("[某**xⅹxX×]");
+
+    public static Boolean isConnect(Set<String> currentCasePartyList, Set<String> connectCasePartyList
+            , String currentCaseNo, String connectCaseNo
+            , String currentCourtName, String connectCourtName) {
+
+        Map<String, Set<String>> currentCasePartyMap = currentCasePartyList.stream().filter(StringUtils::isNotBlank)
+                .map(s -> vagueWordPat.matcher(s).replaceAll("\002"))
+                .collect(Collectors.groupingBy(s -> s.substring(0, 1), Collectors.toSet()));
+
+        Map<String, Set<String>> connectCasePartyMap = connectCasePartyList.stream().filter(StringUtils::isNotBlank)
+                .map(s -> vagueWordPat.matcher(s).replaceAll("\002"))
+                .collect(Collectors.groupingBy(s -> s.substring(0, 1), Collectors.toSet()));
+
+        double matchNum = 0;
+
+        for (String surname : currentCasePartyMap.keySet()) {
+            matchNum += nameEqu(surname, currentCasePartyMap.getOrDefault(surname, new HashSet<String>()), connectCasePartyMap.getOrDefault(surname, new HashSet<>()));
+        }
+        Set<String> collect1 = currentCasePartyMap.entrySet().stream().flatMap(r -> r.getValue().stream()).collect(Collectors.toSet());
+        Set<String> collect2 = connectCasePartyMap.entrySet().stream().flatMap(r -> r.getValue().stream()).collect(Collectors.toSet());
+        int min = Math.min(collect1.size(), collect2.size());
+
+        if (matchNum == min && min != 0) {
+            return true;
+        } else if (NumberUtil.compare(matchNum, NumberUtil.div(min, 2)) >= 0) {
+            return caseNoMatch(currentCaseNo, connectCaseNo) || courtNameMatch(currentCourtName, connectCourtName);
+        } else {
+            return false;
+        }
+    }
+
+    private static double nameEqu(String surname, Set<String> nameSet1, Set<String> nameSet2) {
+        if (nameSet1.isEmpty() || nameSet2.isEmpty()) {
+            return 0;
+        }
+        boolean nameSet1Match = nameSet1.stream().anyMatch(s -> s.contains("\002"));
+        boolean nameSet2Match = nameSet2.stream().anyMatch(s -> s.contains("\002"));
+        if (nameSet1Match || nameSet2Match) {
+            return Math.min(nameSet1.size(), nameSet2.size());
+        } else {
+            return CollectionUtils.intersection(nameSet1, nameSet2).size();
+        }
+    }
+
+    private static Boolean caseNoMatch(String currentCaseNo, String connectCaseNo) {
+        return currentCaseNo.equals(connectCaseNo);
+    }
+
+    private static Boolean courtNameMatch(String currentCourtName, String connectCourtName) {
+        return currentCourtName.equals(connectCourtName);
+    }
+
+    public static void main(String[] args) {
+        Set<String> currentCasePartyList = new HashSet<String>() {
+            {
+                add("广西南宁伟联行置业有限公司");
+                add("广西美满园房地产开发有限公司");
+                add("李桂香");
+            }
+        };
+        Set<String> connectCasePartyList = new HashSet<String>() {
+            {
+                add("广西南宁伟联行置业有限公司");
+                add("广西美满园房地产开发有限公司");
+                add("陈世赞");
+            }
+        };
+        String currentCaseNo = "(2021)浙02执22号";
+        String connectCaseNo = "(2021)浙02执22号";
+        String currentCourtName = "浙江省宁波市中级人民法院";
+        String connectCourtName = "浙江省宁波市中级人民法院";
+
+        System.out.println(isConnect(currentCasePartyList, connectCasePartyList, currentCaseNo, connectCaseNo, currentCourtName, connectCourtName));
+    }
+}