xufei пре 1 година
родитељ
комит
119cbf1c29

+ 39 - 0
src/main/java/com/winhc/bigdata/bean/EntityInfo.java

@@ -0,0 +1,39 @@
+package com.winhc.bigdata.bean;
+
+
+public class EntityInfo {
+    private String keyno;
+    private String name;
+    private String type_id;
+
+    public EntityInfo(String keyno, String name, String type_id) {
+        this.keyno = keyno;
+        this.name = name;
+        this.type_id = type_id;
+    }
+
+
+    public String getKeyno() {
+        return keyno;
+    }
+
+    public void setKeyno(String keyno) {
+        this.keyno = keyno;
+    }
+
+    public String getName() {
+        return name;
+    }
+
+    public void setName(String name) {
+        this.name = name;
+    }
+
+    public String getType_id() {
+        return type_id;
+    }
+
+    public void setType_id(String type_id) {
+        this.type_id = type_id;
+    }
+}

+ 1 - 1
src/main/java/com/winhc/bigdata/udf/JsonArray2ListUdf.java

@@ -32,7 +32,7 @@ public class JsonArray2ListUdf extends UDF {
 
     public static void main(String[] args) {
         JsonArray2ListUdf jsonArray2ListUdf = new JsonArray2ListUdf();
-        List<String> evaluate = jsonArray2ListUdf.evaluate("[{\"show\":\"辽阳文圣仪表厂\",\"value\":\"辽阳文圣仪表厂\"}]", "$.value");
+        List<String> evaluate = jsonArray2ListUdf.evaluate("[{\"show\":\"辽阳文圣仪表厂\",\"value\":\"辽阳文圣仪表厂\"},{\"show\":\"辽阳文圣仪表厂\",\"value\":\"辽阳文圣仪表厂1\"}]", "$.value");
         System.out.println(evaluate);
     }
 

+ 44 - 0
src/main/java/com/winhc/bigdata/udf/NameMerge.java

@@ -0,0 +1,44 @@
+package com.winhc.bigdata.udf;
+
+import com.aliyun.odps.udf.UDF;
+import com.aliyun.odps.utils.StringUtils;
+
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+
+/**
+ * @author: π
+ * 2020/12/16 16:06
+ */
+public class NameMerge extends UDF {
+
+    public List<String> evaluate(String cname, String names) {
+        Set<String> set = new HashSet<>();
+        if (org.apache.commons.lang3.StringUtils.isNotBlank(cname)) {
+            set.add(cname);
+        }
+        if (org.apache.commons.lang3.StringUtils.isNotBlank(names)) {
+            set.addAll(Arrays.stream(names.split("\t;\t"))
+                    .filter(org.apache.commons.lang3.StringUtils::isNotBlank).collect(Collectors.toSet()));
+        }
+        return set.stream()
+                .filter(org.apache.commons.lang3.StringUtils::isNotBlank)
+                .map(x -> x.replaceAll("\t;", "")
+                        .replaceAll("[(]", "(")
+                        .replaceAll("[)]", ")")
+                        .trim()).distinct()
+                .collect(Collectors.toList());
+    }
+
+    public static void main(String[] args) {
+        String name = "海美電子有限公司";
+        String historyName = "	;	陕西十冶投资开发有限公司";
+
+        List<String> evaluate = new NameMerge().evaluate(name, historyName);
+        System.out.println(evaluate);
+    }
+}

+ 40 - 0
src/main/java/com/winhc/bigdata/udf/Str2Json.java

@@ -0,0 +1,40 @@
+package com.winhc.bigdata.udf;
+
+import com.alibaba.fastjson.JSON;
+import com.alibaba.fastjson.JSONArray;
+import com.alibaba.fastjson.JSONPath;
+import com.aliyun.odps.udf.UDF;
+import com.aliyun.odps.utils.StringUtils;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * @author: π
+ * 2021/4/12 16:57
+ */
+public class Str2Json extends UDF {
+    public List<String> evaluate(String case_no) {
+
+        if (StringUtils.isBlank(case_no)) {
+            return Arrays.asList("0",null);
+        }
+        try {
+            JSON.parseArray(case_no);
+            return Arrays.asList("0",case_no);
+        } catch (Exception e) {
+            //e.printStackTrace();
+        }
+        return Arrays.asList("1",JSON.toJSONString(case_no.split("\n")));
+
+    }
+
+    public static void main(String[] args) {
+        Str2Json j = new Str2Json();
+        System.out.println(j.evaluate("(2021)鄂0104民初3635号\n(2021)鄂0104民初3634号"));
+        System.out.println(j.evaluate("[\"(2019)晋0525民初523号\",\"(2019)晋0525民初524号\"]"));
+        System.out.println(j.evaluate(null));
+    }
+
+}

+ 54 - 0
src/main/java/com/winhc/bigdata/udf/getAllName.java

@@ -0,0 +1,54 @@
+package com.winhc.bigdata.udf;
+
+import cn.hutool.json.JSON;
+import com.alibaba.fastjson.JSONArray;
+import com.alibaba.fastjson.JSONObject;
+import com.aliyun.odps.udf.UDF;
+import org.apache.commons.lang3.StringUtils;
+
+import java.util.*;
+import java.util.stream.Collectors;
+
+/**
+ * @author: π
+ * 2020/12/16 16:06
+ */
+public class getAllName extends UDF {
+
+    public String evaluate(String json) {
+        if (null == json) return null;
+        HashMap<String, Map<String, String>> m = new HashMap<>();
+
+        String[] jsonArr = json.split("&");
+        for (String j : jsonArr) {
+            parse(JSONObject.parseArray(j),m,"name");
+        }
+        return String.join(",", m.keySet());
+    }
+
+    public static void parse(JSONArray name, HashMap<String, Map<String, String>> m, String key) {
+        if (null == name || null == m) return;
+        name.forEach(x -> {
+            JSONObject z = (JSONObject) x;
+            String name1 = getKey(z, key);
+            String litigant_id1 = getKey(z, "litigant_id");
+            if (StringUtils.isNotBlank(name1) && StringUtils.isNotBlank(litigant_id1) && litigant_id1.length() == 32) {
+                m.put(name1, null);
+            }
+        });
+
+    }
+
+    public static String getKey(JSONObject j, String key) {
+        if (j == null) return "";
+        Object r = j.get(key);
+        return r == null ? "" : r.toString();
+    }
+
+    public static void main(String[] args) {
+        //String name = "[{\"name\":\"中国建设银行股份有限公司济宁古槐路支行\",\"party_title\":\"原告\",\"litigant_id\":\"ac33212c6b434ee51a42c0a3df5cc898\"},{\"name\":\"zhangsan\",\"party_title\":\"原告\",\"litigant_id\":\"1ac33212c6b434ee51a42c0a3df5cc898\"}]";
+        String name = "[{\"litigant_id\":\"\",\"name\":\"刘琳\"}]&[{\"litigant_id\":\"54fe548645a86afe794e7fe1655d1c39\",\"name\":\"山东恒鑫伟业胶带有限公司\"}]&[{\"litigant_id\":\"dd7df761fe4e6112132bedbc4d4b103d\",\"name\":\"山东德福食品有限公司\"}]&[{\"litigant_id\":\"p585bc2cf9308fc6c3483617e6a1026d2\",\"name\":\"王强\"}]&[{\"litigant_id\":\"p899391fb5b11857c635a7ae52e7fb5a9\",\"name\":\"汪凤家\"}]";
+        String evaluate = new getAllName().evaluate(name);
+        System.out.println(evaluate);
+    }
+}

+ 51 - 0
src/main/java/com/winhc/bigdata/udf/getBizDate.java

@@ -0,0 +1,51 @@
+package com.winhc.bigdata.udf;
+
+import com.aliyun.odps.udf.UDF;
+import org.apache.commons.lang3.StringUtils;
+
+import java.text.SimpleDateFormat;
+import java.util.Date;
+
+import static com.winhc.bigdata.utils.BigDecimalUtil.div;
+
+/**
+ * @Author: π
+ * @Date: 2020/5/14 16:26
+ * @Description: draws 字段规整
+ */
+public class getBizDate extends UDF {
+
+    public String evaluate(String date) {
+        if (StringUtils.isBlank(date)) {
+            return null;
+        }
+        String s = date.replaceAll("年", "-").replaceAll("月", "-")
+                .replaceAll("日", "").replaceAll("/", "-");
+
+        if (s.length() >= 10 && isLegalDate(s.substring(0, 10))) {
+            return s.substring(0, 10);
+        }
+        return null;
+    }
+
+    public static final Boolean isLegalDate(String date) {
+        try {
+            String p = "yyyy-MM-dd HH:mm:ss";
+            if (date.length() == 10) {
+                p = "yyyy-MM-dd";
+            }
+            SimpleDateFormat fm = new SimpleDateFormat(p);
+            Date date1 = fm.parse(date);
+            return date.equals(fm.format(date1));
+        } catch (Exception e) {
+            return false;
+        }
+    }
+
+    public static void main(String[] args) {
+        getBizDate g = new getBizDate();
+        System.out.println(g.evaluate("2022-10-15 00:20:35"));
+    }
+}
+
+

+ 37 - 0
src/main/java/com/winhc/bigdata/udf/getEntity.java

@@ -0,0 +1,37 @@
+package com.winhc.bigdata.udf;
+
+import com.alibaba.fastjson.JSON;
+import com.alibaba.fastjson.JSONObject;
+import com.aliyun.odps.udf.UDF;
+import com.winhc.bigdata.bean.EntityInfo;
+import org.apache.commons.lang3.StringUtils;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.stream.Collectors;
+
+/**
+ * @Author: π
+ * @Date: 2020/5/14 16:26
+ * @Description: draws 字段规整
+ */
+public class getEntity extends UDF {
+
+    public String evaluate(String entity, String id_key, String name_key, String type) {
+        if (StringUtils.isBlank(entity)) return "[]";
+        List<JSONObject> l = JSON.parseArray(entity).toJavaList(JSONObject.class);
+        List<EntityInfo> res = l.stream().map(x -> {
+            String id = x.getString(id_key);
+            String name = x.getString(name_key);
+            return new EntityInfo(id, name, type);
+        }).collect(Collectors.toList());
+        return JSON.toJSONString(res);
+    }
+
+    public static void main(String[] args) {
+        getEntity g = new getEntity();
+        System.out.println(g.evaluate("[{\"name\": \"中国民用航空总局第二研究所\", \"keyno\": \"e8365c0f59fa889641ac24e5d53e260a\"}]", "keyno", "name", "1"));
+    }
+}
+
+

+ 83 - 0
src/main/java/com/winhc/bigdata/udf/get_case_amt.java

@@ -0,0 +1,83 @@
+package com.winhc.bigdata.udf;
+
+import cn.hutool.core.util.StrUtil;
+import com.alibaba.fastjson.JSON;
+import com.alibaba.fastjson.JSONObject;
+import com.aliyun.odps.udf.UDF;
+import com.aliyun.odps.utils.StringUtils;
+
+import java.util.Comparator;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.stream.Collectors;
+
+/**
+ * @author: π
+ * 2021/8/30 16:57
+ * 案件金额判断
+ * judge_info [{"amt":30.492,"case_stage":"恢复执行"},{"amt":30.0,"case_stage":"民事一审"}]
+ * exec_info  [{"date":"2022-06-14 00:00:00","exec_money":6.18,"litigant_id":"a87cd9b6bc6e11be1681fe5ab0166dae","name":"云南尚多购商贸有限公司"},{"date":"2022-06-14 00:00:00","exec_money":6.18,"litigant_id":"b5570cbb03c2ce1cec47a95a5e281c99","name":"通海尚多购商贸有限公司"}]
+ */
+public class get_case_amt extends UDF {
+
+    public Double evaluate(String exec_info, String judge_info) {
+        if (StringUtils.isBlank(exec_info) && StringUtils.isBlank(judge_info)) return null;
+        try {
+            double exec_amt = JSON.parseArray(Optional.ofNullable(exec_info)
+                    .orElse("[]"))
+                    .toJavaList(JSONObject.class)
+                    .stream()
+                    .sorted(Comparator.comparing(x -> {
+                        JSONObject j = (JSONObject)x;
+                        return j.getString("date");
+                    }).reversed())
+                    .mapToDouble(y -> y.getDouble("exec_money"))
+                    .findFirst().orElse(0);
+
+            double judge_amt = JSON.parseArray(Optional.ofNullable(judge_info)
+                    .orElse("[]"))
+                    .toJavaList(JSONObject.class)
+                    .stream()
+                    .sorted(Comparator.comparing(x -> {
+                        JSONObject j = (JSONObject)x;
+                        return stageWeight(j.getString("case_stage"));
+                    }).reversed())
+                    .mapToDouble(y -> y.getDouble("amt"))
+                    .findFirst().orElse(0);
+
+            return exec_amt > 0 ? exec_amt : judge_amt > 0 ? judge_amt : null;
+        } catch (Exception e) {
+            return null;
+        }
+
+    }
+
+    public Integer stageWeight(String stage_name) {
+        if (StringUtils.isBlank(stage_name)) return 0;
+        if (StrUtil.containsAny(stage_name, "执")) {
+            return 5;
+        }
+        if (StrUtil.containsAny(stage_name, "再")) {
+            return 4;
+        }
+        if (StrUtil.containsAny(stage_name, "二")) {
+            return 3;
+        }
+        if (StrUtil.containsAny(stage_name, "一")) {
+            return 2;
+        }
+        return 1;
+    }
+
+
+    public static void main(String[] args) {
+        get_case_amt j = new get_case_amt();
+        //String exec_info = "[{\"date\":\"2022-07-15 00:00:00\",\"exec_money\":8.88,\"litigant_id\":\"11\",\"name\":\"云南尚多购商贸有限公司\"},{\"date\":\"2022-06-14 00:00:00\",\"exec_money\":9.99,\"litigant_id\":\"22\",\"name\":\"通海尚多购商贸有限公司\"}]";
+        String exec_info = null;
+                String judge_info = "[{\"amt\":30.492,\"case_stage\":\"恢复执行\"},{\"amt\":30.0,\"case_stage\":\"民事一审\"}]";
+        Double e = j.evaluate(exec_info, judge_info);
+        System.out.println(e);
+    }
+
+}

+ 42 - 0
src/main/java/com/winhc/bigdata/udf/get_draws.java

@@ -0,0 +1,42 @@
+package com.winhc.bigdata.udf;
+
+import com.alibaba.fastjson.JSON;
+import com.alibaba.fastjson.JSONObject;
+import com.aliyun.odps.udf.UDF;
+import org.apache.commons.lang3.StringUtils;
+
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.List;
+import java.util.stream.Collectors;
+
+/**
+ * @Author: π
+ * @Date: 2020/5/14 16:26
+ * @Description: draws 字段规整
+ */
+public class get_draws extends UDF {
+
+    public String evaluate(String draw) {
+        if (StringUtils.isEmpty(draw)) return null;
+        if(draw.startsWith("http")) return draw;
+        try {
+            List<String> list = JSON.parseArray(draw, String.class);
+            if (list.isEmpty()) return null;
+            return list.stream()
+                    .filter(StringUtils::isNotBlank)
+                    .distinct()
+                    .collect(Collectors.joining(","));
+        } catch (Exception e) {
+            return null;
+        }
+    }
+
+    public static void main(String[] args) {
+        get_draws g = new get_draws();
+        System.out.println(g.evaluate("[\"http://static.tianyancha.com/patent/abstractPic/CN/U/212/509/CN212509215U_dest_path_EDA0002847163960000011.png\", \"http://static.tianyancha.com/patent/abstractPic/CN/U/212/509/CN212509215U.png\"]"));
+        System.out.println(g.evaluate("http://static.tianyancha.com/patent/abstractPic/CN/U/212/509/CN212509215U_dest_path_EDA0002847163960000011.png"));
+    }
+}
+
+

+ 49 - 0
src/main/java/com/winhc/bigdata/udf/get_entity.java

@@ -0,0 +1,49 @@
+package com.winhc.bigdata.udf;
+
+import com.alibaba.fastjson.JSON;
+import com.aliyun.odps.udf.UDF;
+import com.aliyun.odps.utils.StringUtils;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+/**
+ * @author: π
+ * 2021/8/30 16:57
+ */
+public class get_entity extends UDF {
+
+    public List<String> evaluate(String json, String key1, String key2) {
+        if (StringUtils.isBlank(json) || "[]".equalsIgnoreCase(json)) return null;
+        try {
+            List<Map> list1 = JSON.parseArray(json, Map.class);
+            List<String> m1 = list1.stream().map(m -> {
+                String name = toStringV2(m.getOrDefault(key1, ""));
+                String id = toStringV2(m.getOrDefault(key2, ""));
+                return name + "@@" + id;
+            }).filter(StringUtils::isNotBlank).distinct()
+                    .collect(Collectors.toList());
+            if (m1.isEmpty()) return null;
+            return m1;
+        } catch (Exception e) {
+            e.printStackTrace();
+            return null;
+        }
+
+    }
+
+    public static String toStringV2(Object o) {
+        if (null == o) return null;
+        return o.toString();
+    }
+
+    public static void main(String[] args) {
+        get_entity j = new get_entity();
+        String json = "[{\"name\":\"冯金元111\",\"id\":\"pbc6c4f933e85850d1c401509877671b0\",\"type\":1,\"deleted\":9},{\"name\":\"冯金元\",\"id\":\"pbc6c4f933e85850d1c401509877671b0\",\"type\":1,\"deleted\":1},{\"name\":\"冯金元2\",\"id\":\"pbc6c4f933e85850d1c401509877671b0\",\"type\":1,\"deleted\":0}]";
+        List<String> evaluate = j.evaluate(json,"name","id");
+        System.out.println(evaluate);
+    }
+
+}

+ 41 - 0
src/main/java/com/winhc/bigdata/udf/get_text_from_json.java

@@ -0,0 +1,41 @@
+package com.winhc.bigdata.udf;
+
+import com.alibaba.fastjson.JSON;
+import com.alibaba.fastjson.JSONObject;
+import com.aliyun.odps.udf.UDF;
+import org.apache.commons.lang3.StringUtils;
+
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.List;
+import java.util.stream.Collectors;
+
+/**
+ * @Author: π
+ * @Date: 2020/5/14 16:26
+ * @Description: 提取文字里字段
+ */
+public class get_text_from_json extends UDF {
+
+    public String evaluate(String json, String name) {
+        if (StringUtils.isEmpty(json)) return "";
+        try {
+            List<JSONObject> list = JSON.parseArray(json, JSONObject.class);
+            if (list.isEmpty()) return "";
+            return list.stream().map(d -> d.getString(name))
+                    .filter(StringUtils::isNotBlank)
+                    .sorted(Comparator.naturalOrder())
+                    .collect(Collectors.joining(""));
+        } catch (Exception e) {
+            return "";
+        }
+    }
+
+    public static void main(String[] args) {
+        get_text_from_json g = new get_text_from_json();
+        System.out.println(g.evaluate("[{\"name\": \"广州瑞华健康医疗科技有限公司\", \"keyno\": \"a9bd4f420b5e83d56aa0a8a3d650b587\"}]", "name"));
+        System.out.println(g.evaluate("[{\"name\": \"南平市烟草公司邵武分公司\", \"keyno\": \"d00a778bd7d66bdbbd60b7c6c19c1c5a\"}, {\"name\": \"南平市烟草公司光泽分公司\", \"keyno\": \"4ae275469d74618b2c776cadf93c4ae6\"}]","name"));
+    }
+}
+
+

+ 53 - 0
src/main/java/com/winhc/bigdata/udf/processMoney.java

@@ -0,0 +1,53 @@
+package com.winhc.bigdata.udf;
+
+import com.alibaba.fastjson.JSON;
+import com.alibaba.fastjson.JSONObject;
+import com.aliyun.odps.udf.UDF;
+import com.winhc.bigdata.bean.EntityInfo;
+import org.apache.commons.lang3.StringUtils;
+
+import java.util.List;
+import java.util.stream.Collectors;
+
+import static com.winhc.bigdata.utils.BigDecimalUtil.div;
+
+/**
+ * @Author: π
+ * @Date: 2020/5/14 16:26
+ * @Description: draws 字段规整
+ */
+public class processMoney extends UDF {
+
+    public Double evaluate(String amt, Integer num) {
+        if (StringUtils.isBlank(amt)) return 0D;
+        double amtstr;
+        amt = amt.replaceAll("[^\\d.]", "");
+        if (amt.equals("") || amt.split("\\.").length > 2 || amt.startsWith(".") || amt.endsWith(".")) {
+            return 0D;
+        }
+        try {
+            amtstr = amt_div(amt, num.toString(), 6);
+        } catch (Exception e) {
+            amtstr = 0D;
+        }
+        return amtstr;
+    }
+
+    public static Double amt_div(String amt1, String amt2, Integer length) {
+        Double r1;
+        if (StringUtils.isBlank(amt1) || StringUtils.isBlank(amt2)) return 0d;
+        try {
+            r1 = div(amt1, amt2, length);
+        } catch (Exception e) {
+            r1 = 0d;
+        }
+        return r1;
+    }
+
+    public static void main(String[] args) {
+        processMoney g = new processMoney();
+        System.out.println(g.evaluate("398646125",10000));
+    }
+}
+
+

+ 4 - 2
src/main/java/com/winhc/bigdata/udf/split_names.java

@@ -45,8 +45,10 @@ public class split_names extends UDF {
 
     public static void main(String[] args) {
         //String jsons = "[{\"name\":\"张海林\",\"litigant_id\":\"\"},{\"name\":\"招商银行股份有限公司信用卡中心\",\"litigant_id\":\"cc45eeb0634f73531ba54ad55ba152a6\"}]&[{\"name\":\"上海浦东发展银行静安支行\",\"litigant_id\":\"c2c57fcc6398cfd49393c1a2d3c35a4c\"}]";
-        String jsons = "[{\"name\":\"南京颍州电气设备有限公司\",\"litigant_id\":\"\",\"date\":\"2010-06-22T00:00:00.000+08:00\",\"exec_money\":3.692949}]";
-        String json_path = "$.exec_money";
+        //String jsons = "[{\"name\":\"南京颍州电气设备有限公司\",\"litigant_id\":\"\",\"date\":\"2010-06-22T00:00:00.000+08:00\",\"exec_money\":3.692949}]";
+        //String json_path = "$.exec_money";
+        String jsons = "[\"肖毅\",\"赵磊\"]";
+        String json_path = "$[*]";
         split_names n = new split_names();
         System.out.println(n.evaluate(jsons, json_path));
     }

+ 33 - 0
src/main/java/com/winhc/bigdata/udf/trans_type.java

@@ -0,0 +1,33 @@
+package com.winhc.bigdata.udf;
+
+import com.aliyun.odps.udf.UDF;
+import org.apache.commons.lang3.StringUtils;
+
+/**
+ * @Author: π
+ * @Date: 2020/5/14 16:26
+ * @Description: type 字段规整
+ */
+public class trans_type extends UDF {
+
+    public String evaluate(String type) {
+        if (StringUtils.isBlank(type)) {
+            return null;
+        }
+        if (type.contains(" ")) {
+            String[] ts = type.split(" ");
+            if (ts.length >= 2) return ts[1];
+            return null;
+        }
+        return type;
+    }
+
+    public static void main(String[] args) {
+        trans_type g = new trans_type();
+        System.out.println(g.evaluate("F 美术 xx"));
+        System.out.println(g.evaluate(" 美术"));
+        System.out.println(g.evaluate("  "));
+    }
+}
+
+