Bladeren bron

Merge branch 'master' of http://139.224.213.4:3000/bigdata/UDF_Max

JimZhang 2 jaren geleden
bovenliggende
commit
666ccefcb9

+ 6 - 0
pom.xml

@@ -81,6 +81,12 @@
             <version>1.1.8</version>
         </dependency>
 
+        <dependency>
+            <groupId>org.apache.commons</groupId>
+            <artifactId>commons-lang3</artifactId>
+            <version>3.7</version>
+        </dependency>
+
     </dependencies>
 
     <build>

+ 26 - 0
src/main/java/com/winhc/bigdata/udf/ContainOtherChar.java

@@ -0,0 +1,26 @@
+package com.winhc.bigdata.udf;
+
+import com.aliyun.odps.udf.UDF;
+import com.aliyun.odps.utils.StringUtils;
+
+import java.util.regex.Pattern;
+
+/**
+ * @Author: π
+ * @Description: 包含字符
+ */
+public class ContainOtherChar extends UDF {
+    private static final Pattern pattern = Pattern.compile("[\\u4e00-\\u9fa5,]");
+
+    public Boolean evaluate(String val) {
+        if(StringUtils.isBlank(val)) return false;
+        String s1 = pattern.matcher(val).replaceAll("");
+        return s1.length() > 0;
+    }
+
+    public static void main(String[] args) {
+        ContainOtherChar res = new ContainOtherChar();
+        System.out.println(res.evaluate("中国,人123,za.早上5"));
+        System.out.println(res.evaluate("高管"));
+    }
+}

+ 39 - 0
src/main/java/com/winhc/bigdata/udf/StringToMillis.java

@@ -0,0 +1,39 @@
+package com.winhc.bigdata.udf;
+
+import com.aliyun.odps.udf.UDF;
+import com.aliyun.odps.utils.StringUtils;
+
+import java.text.SimpleDateFormat;
+import java.util.Arrays;
+import java.util.List;
+import java.util.regex.Pattern;
+
+/**
+ * @Author: π
+ * @Date: 2022/6/14 11:13
+ * @Description:
+ */
+public class StringToMillis extends UDF {
+
+    public String evaluate(String date) {
+        if (StringUtils.isEmpty(date)) {
+            return null;
+        }
+        String p = "yyyy-MM-dd HH:mm:ss";
+        if (date.length() == 10) {
+            p = "yyyy-MM-dd";
+        }
+        SimpleDateFormat fm = new SimpleDateFormat(p);
+        try {
+            return String.valueOf(fm.parse(date).getTime());
+        } catch (Exception e) {
+            return null;
+        }
+    }
+
+    public static void main(String[] args) {
+        String s = "2022-06-14 00:00:00";
+        StringToMillis r = new StringToMillis();
+        System.out.println(r.evaluate(s));
+    }
+}

+ 26 - 0
src/main/java/com/winhc/bigdata/udf/TrimLable.java

@@ -0,0 +1,26 @@
+package com.winhc.bigdata.udf;
+
+import com.aliyun.odps.udf.UDF;
+import com.aliyun.odps.utils.StringUtils;
+import org.apache.commons.lang3.StringEscapeUtils;
+
+import java.util.regex.Pattern;
+
+/**
+ * @Author: π
+ * @Description: 去除标签
+ */
+public class TrimLable extends UDF {
+    public static final Pattern compile = Pattern.compile("<?[^<>]+>");
+
+    public String evaluate(String text) {
+        if (StringUtils.isBlank(text)) return text;
+        return StringEscapeUtils.unescapeHtml4(compile.matcher(text).replaceAll("")).replaceAll("\\s*","");
+    }
+
+    public static void main(String[] args) {
+        TrimLable res = new TrimLable();
+        System.out.println(res.evaluate("<xx> 张 三 &nb sp"));
+        System.out.println(res.evaluate("< 高 管xx  yyxx  ><>"));
+    }
+}

+ 18 - 0
src/main/java/com/winhc/bigdata/udf/company/CreditCodeVerify.java

@@ -0,0 +1,18 @@
+package com.winhc.bigdata.udf.company;
+
+import com.aliyun.odps.udf.UDF;
+import com.aliyun.odps.utils.StringUtils;
+
+/**
+ * @author: XuJiakai
+ * 2022/7/26 17:34
+ */
+public class CreditCodeVerify extends UDF {
+    /**
+     * @param val
+     * @return true 合法
+     */
+    public boolean evaluate(String val) {
+        return StringUtils.isNotBlank(val) && val.length() == 18 && val.charAt(0) == '9';
+    }
+}

+ 28 - 0
src/main/java/com/winhc/bigdata/udf/company/RegNumberVerify.java

@@ -0,0 +1,28 @@
+package com.winhc.bigdata.udf.company;
+
+import com.aliyun.odps.udf.UDF;
+import com.aliyun.odps.utils.StringUtils;
+
+/**
+ * @author: XuJiakai
+ * 2022/7/26 17:36
+ */
+public class RegNumberVerify extends UDF {
+
+    private static boolean isNumber(String val) {
+        try {
+            Double.parseDouble(val);
+            return true;
+        } catch (Exception e) {
+            return false;
+        }
+    }
+
+    /**
+     * @param val
+     * @return true 合法
+     */
+    public boolean evaluate(String val) {
+        return StringUtils.isNotBlank(val) && val.length() == 15 && isNumber(val.substring(0, 6));
+    }
+}

File diff suppressed because it is too large
+ 78 - 0
src/main/java/com/winhc/bigdata/udf/get_phone.java


+ 53 - 0
src/main/java/com/winhc/bigdata/udf/trans_tel_info.java

@@ -0,0 +1,53 @@
+package com.winhc.bigdata.udf;
+
+import com.alibaba.fastjson.JSON;
+import com.aliyun.odps.udf.UDF;
+import com.aliyun.odps.utils.StringUtils;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+/**
+ * @author: π
+ * 2021/8/30 16:57
+ */
+public class trans_tel_info extends UDF {
+    private static List<String> cols = Arrays.asList("amomon", "paymet", "time");
+
+    public String evaluate(String json) {
+        if (StringUtils.isBlank(json) || "[]".equalsIgnoreCase(json)) return null;
+        try {
+            List<Map> list1 = JSON.parseArray(json, Map.class);
+            List<Map> m1 = list1.stream().map(m -> {
+                String public_time = toStringV2(m.getOrDefault("public_time", null));
+                if(org.apache.commons.lang.StringUtils.isNotBlank(public_time) && public_time.length() == 18){
+                    public_time = public_time.substring(0,10)+" "+ public_time.substring(10);
+                }
+                m.put("public_time",public_time);
+                return m;
+            }).distinct()
+                    .collect(Collectors.toList());
+            if (m1.isEmpty()) return null;
+            return JSON.toJSONString(m1);
+        } catch (Exception e) {
+            e.printStackTrace();
+            return null;
+        }
+
+    }
+
+    public static String toStringV2(Object o) {
+        if (null == o) return null;
+        return o.toString();
+    }
+
+    public static void main(String[] args) {
+        trans_tel_info j = new trans_tel_info();
+        String json = "[{\"deleted\":\"0\",\"is_phone\":2,\"public_time\":\"2020-01-0100:00:00\",\"source\":\"2020年报\",\"tel\":\"021-32278500\"},{\"deleted\":\"0\",\"is_phone\":2,\"public_time\":\"2014-01-0100:00:00\",\"source\":\"2014年报\",\"tel\":\"021-51118422\"},{\"deleted\":\"0\",\"is_phone\":2,\"public_time\":\"2018-01-0100:00:00\",\"source\":\"2018年报\",\"tel\":\"021-51118500\"}]";
+        String evaluate = j.evaluate(json);
+        System.out.println(evaluate);
+    }
+
+}

+ 44 - 0
src/main/java/com/winhc/bigdata/udf/verify_case_no.java

@@ -0,0 +1,44 @@
+package com.winhc.bigdata.udf;
+
+import com.aliyun.odps.udf.UDF;
+import com.aliyun.odps.utils.StringUtils;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.stream.Stream;
+
+/**
+ * @Author: π
+ * @Description: 校验案号
+ */
+public class verify_case_no extends UDF {
+
+    private final static Pattern pattern = Pattern.compile("^[((](\\d{4})[))][黑吉辽冀甘青陕豫鲁晋皖鄂湘苏川黔云浙赣粤闽台琼新蒙宁桂藏京沪津渝港澳内军兵最].*[\\u4e00-\\u9fa5]{1,3}.*\\d{0,7}.*号.*$");
+
+    public Boolean evaluate(String case_no) {
+        if (StringUtils.isBlank(case_no)) return false;
+        Matcher matcher = pattern.matcher(case_no);
+        if (matcher.matches()) {
+//            String year = matcher.group(1);
+//            return Integer.parseInt(year) > 2015;
+            return true;
+        }
+        return false;
+    }
+
+
+    public static void main(String[] args) {
+        Stream.of("(2021)云0325民初号","(2021)辽1221民初298-2号","(2021)鲁1723刑初号","(2021)云25民初461号","(2016)最高法行申3420号", "(2015)最高法行申3420号", "(2018)最高法行申号", "(2020)京0491民初28109号"
+                , "(2017)苏0684民初2250号", "(2012)甬余陆商初字第00001号", "(2013)芜民一初字第01203号")
+                .forEach(x -> {
+                    verify_case_no g = new verify_case_no();
+                    System.out.println(x + " =====> " + g.evaluate(x));
+                });
+
+    }
+
+}
+
+