|
@@ -0,0 +1,37 @@
|
|
|
+package com.winhc.bigdata.udf.etl;
|
|
|
+
|
|
|
+import com.aliyun.odps.udf.UDF;
|
|
|
+import com.aliyun.odps.utils.StringUtils;
|
|
|
+
|
|
|
+import java.util.regex.Matcher;
|
|
|
+import java.util.regex.Pattern;
|
|
|
+
|
|
|
+/**
|
|
|
+ * @author: XuJiakai
|
|
|
+ * 2021/9/13 13:56
|
|
|
+ */
|
|
|
+public class CompanyZxrRestrictPublishDate extends UDF {
|
|
|
+// private final static Pattern pattern = Pattern.compile("^http.+/(20\\d{2}-(0\\d)|(1[0-2])-\\d{2})/.+\\.pdf$");
|
|
|
+// private final static Pattern pattern = Pattern.compile("^http://zxgk\\.court\\.gov\\.cn/[a-z0-9A-Z]+(/\\d+)?/(?<time>\\d{4}-\\d{2}-\\d{2})/.+\\.pdf$");
|
|
|
+ private final static Pattern pattern = Pattern.compile(".*(?<time>\\d{4}-\\d{2}-\\d{2})/[a-zA-Z0-9\\-]+\\.pdf$");
|
|
|
+
|
|
|
+ public String evaluate(String content) {
|
|
|
+ if (StringUtils.isEmpty(content)) {
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+ Matcher matcher = pattern.matcher(content);
|
|
|
+ if (matcher.matches()) {
|
|
|
+ return matcher.group("time");
|
|
|
+ }
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+
|
|
|
+ public static void main(String[] args) {
|
|
|
+ CompanyZxrRestrictPublishDate companyZxrRestrictPublishDate = new CompanyZxrRestrictPublishDate();
|
|
|
+
|
|
|
+ String date = "http://zxgk.court.gov.cn/xglfile/2021-06-29/8276d2164b2a4f229c31735a09036439.pdf";
|
|
|
+ date = companyZxrRestrictPublishDate.evaluate(date);
|
|
|
+
|
|
|
+ System.out.println(date);
|
|
|
+ }
|
|
|
+}
|