xufei 2 years ago
parent
commit
f4016e0cce

+ 6 - 0
pom.xml

@@ -81,6 +81,12 @@
             <version>1.1.8</version>
         </dependency>
 
+        <dependency>
+            <groupId>org.apache.commons</groupId>
+            <artifactId>commons-lang3</artifactId>
+            <version>3.7</version>
+        </dependency>
+
     </dependencies>
 
     <build>

+ 26 - 0
src/main/java/com/winhc/bigdata/udf/ContainOtherChar.java

@@ -0,0 +1,26 @@
+package com.winhc.bigdata.udf;
+
+import com.aliyun.odps.udf.UDF;
+import com.aliyun.odps.utils.StringUtils;
+
+import java.util.regex.Pattern;
+
+/**
+ * @Author: π
+ * @Description: 包含字符
+ */
+public class ContainOtherChar extends UDF {
+    private static final Pattern pattern = Pattern.compile("[\\u4e00-\\u9fa5,]");
+
+    public Boolean evaluate(String val) {
+        if(StringUtils.isBlank(val)) return false;
+        String s1 = pattern.matcher(val).replaceAll("");
+        return s1.length() > 0;
+    }
+
+    public static void main(String[] args) {
+        ContainOtherChar res = new ContainOtherChar();
+        System.out.println(res.evaluate("中国,人123,za.早上5"));
+        System.out.println(res.evaluate("高管"));
+    }
+}

+ 26 - 0
src/main/java/com/winhc/bigdata/udf/TrimLable.java

@@ -0,0 +1,26 @@
+package com.winhc.bigdata.udf;
+
+import com.aliyun.odps.udf.UDF;
+import com.aliyun.odps.utils.StringUtils;
+import org.apache.commons.lang3.StringEscapeUtils;
+
+import java.util.regex.Pattern;
+
+/**
+ * @Author: π
+ * @Description: 去除标签
+ */
+public class TrimLable extends UDF {
+    public static final Pattern compile = Pattern.compile("<?[^<>]+>");
+
+    public String evaluate(String text) {
+        if (StringUtils.isBlank(text)) return text;
+        return StringEscapeUtils.unescapeHtml4(compile.matcher(text).replaceAll("")).replaceAll("\\s*","");
+    }
+
+    public static void main(String[] args) {
+        TrimLable res = new TrimLable();
+        System.out.println(res.evaluate("<xx> 张 三 &nb sp"));
+        System.out.println(res.evaluate("< 高 管xx  yyxx  ><>"));
+    }
+}

+ 53 - 0
src/main/java/com/winhc/bigdata/udf/trans_tel_info.java

@@ -0,0 +1,53 @@
+package com.winhc.bigdata.udf;
+
+import com.alibaba.fastjson.JSON;
+import com.aliyun.odps.udf.UDF;
+import com.aliyun.odps.utils.StringUtils;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+/**
+ * @author: π
+ * 2021/8/30 16:57
+ */
+public class trans_tel_info extends UDF {
+    private static List<String> cols = Arrays.asList("amomon", "paymet", "time");
+
+    public String evaluate(String json) {
+        if (StringUtils.isBlank(json) || "[]".equalsIgnoreCase(json)) return null;
+        try {
+            List<Map> list1 = JSON.parseArray(json, Map.class);
+            List<Map> m1 = list1.stream().map(m -> {
+                String public_time = toStringV2(m.getOrDefault("public_time", null));
+                if(org.apache.commons.lang.StringUtils.isNotBlank(public_time) && public_time.length() == 18){
+                    public_time = public_time.substring(0,10)+" "+ public_time.substring(10);
+                }
+                m.put("public_time",public_time);
+                return m;
+            }).distinct()
+                    .collect(Collectors.toList());
+            if (m1.isEmpty()) return null;
+            return JSON.toJSONString(m1);
+        } catch (Exception e) {
+            e.printStackTrace();
+            return null;
+        }
+
+    }
+
+    public static String toStringV2(Object o) {
+        if (null == o) return null;
+        return o.toString();
+    }
+
+    public static void main(String[] args) {
+        trans_tel_info j = new trans_tel_info();
+        String json = "[{\"deleted\":\"0\",\"is_phone\":2,\"public_time\":\"2020-01-0100:00:00\",\"source\":\"2020年报\",\"tel\":\"021-32278500\"},{\"deleted\":\"0\",\"is_phone\":2,\"public_time\":\"2014-01-0100:00:00\",\"source\":\"2014年报\",\"tel\":\"021-51118422\"},{\"deleted\":\"0\",\"is_phone\":2,\"public_time\":\"2018-01-0100:00:00\",\"source\":\"2018年报\",\"tel\":\"021-51118500\"}]";
+        String evaluate = j.evaluate(json);
+        System.out.println(evaluate);
+    }
+
+}

+ 44 - 0
src/main/java/com/winhc/bigdata/udf/verify_case_no.java

@@ -0,0 +1,44 @@
+package com.winhc.bigdata.udf;
+
+import com.aliyun.odps.udf.UDF;
+import com.aliyun.odps.utils.StringUtils;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.stream.Stream;
+
+/**
+ * @Author: π
+ * @Description: 校验案号
+ */
+public class verify_case_no extends UDF {
+
+    private final static Pattern pattern = Pattern.compile("^[((](\\d{4})[))][黑吉辽冀甘青陕豫鲁晋皖鄂湘苏川黔云浙赣粤闽台琼新蒙宁桂藏京沪津渝港澳内军兵最].*[\\u4e00-\\u9fa5]{1,3}.*\\d{0,7}.*号.*$");
+
+    public Boolean evaluate(String case_no) {
+        if (StringUtils.isBlank(case_no)) return false;
+        Matcher matcher = pattern.matcher(case_no);
+        if (matcher.matches()) {
+//            String year = matcher.group(1);
+//            return Integer.parseInt(year) > 2015;
+            return true;
+        }
+        return false;
+    }
+
+
+    public static void main(String[] args) {
+        Stream.of("(2021)云0325民初号","(2021)辽1221民初298-2号","(2021)鲁1723刑初号","(2021)云25民初461号","(2016)最高法行申3420号", "(2015)最高法行申3420号", "(2018)最高法行申号", "(2020)京0491民初28109号"
+                , "(2017)苏0684民初2250号", "(2012)甬余陆商初字第00001号", "(2013)芜民一初字第01203号")
+                .forEach(x -> {
+                    verify_case_no g = new verify_case_no();
+                    System.out.println(x + " =====> " + g.evaluate(x));
+                });
+
+    }
+
+}
+
+