浏览代码

feat: add CompanyZxrRestrictPublishDate

许家凯 3 年之前
父节点
当前提交
cc0de7ac9b
共有 1 个文件被更改,包括 37 次插入0 次删除
  1. 37 0
      src/main/java/com/winhc/bigdata/udf/etl/CompanyZxrRestrictPublishDate.java

+ 37 - 0
src/main/java/com/winhc/bigdata/udf/etl/CompanyZxrRestrictPublishDate.java

@@ -0,0 +1,37 @@
+package com.winhc.bigdata.udf.etl;
+
+import com.aliyun.odps.udf.UDF;
+import com.aliyun.odps.utils.StringUtils;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * @author: XuJiakai
+ * 2021/9/13 13:56
+ */
+public class CompanyZxrRestrictPublishDate extends UDF {
+//    private final static Pattern pattern = Pattern.compile("^http.+/(20\\d{2}-(0\\d)|(1[0-2])-\\d{2})/.+\\.pdf$");
+//    private final static Pattern pattern = Pattern.compile("^http://zxgk\\.court\\.gov\\.cn/[a-z0-9A-Z]+(/\\d+)?/(?<time>\\d{4}-\\d{2}-\\d{2})/.+\\.pdf$");
+    private final static Pattern pattern = Pattern.compile(".*(?<time>\\d{4}-\\d{2}-\\d{2})/[a-zA-Z0-9\\-]+\\.pdf$");
+
+    public String evaluate(String content) {
+        if (StringUtils.isEmpty(content)) {
+            return null;
+        }
+        Matcher matcher = pattern.matcher(content);
+        if (matcher.matches()) {
+            return matcher.group("time");
+        }
+        return null;
+    }
+
+    public static void main(String[] args) {
+        CompanyZxrRestrictPublishDate companyZxrRestrictPublishDate = new CompanyZxrRestrictPublishDate();
+
+        String date = "http://zxgk.court.gov.cn/xglfile/2021-06-29/8276d2164b2a4f229c31735a09036439.pdf";
+        date = companyZxrRestrictPublishDate.evaluate(date);
+
+        System.out.println(date);
+    }
+}