xufei пре 5 месеци
родитељ
комит
e410314db3

+ 169 - 0
src/main/java/com/winhc/bigdata/bean/BaseFieldMeta.java

@@ -0,0 +1,169 @@
+package com.winhc.bigdata.bean;
+
+
+
+import java.sql.Timestamp;
+
+
+public class BaseFieldMeta {
+
+    private String fid;
+
+    private String tn;
+
+    private String name;
+
+    private String name_cn;
+
+    private String description;
+
+    private String odps_type;
+
+    private String database_type;
+
+    private String schema_type;
+
+    private Boolean required;
+
+    private Boolean visible_to_customers;
+
+    private Boolean not_empty;
+
+    private Integer max_length;
+
+    private Integer limit_length;
+
+    private Timestamp create_time;
+
+    private Timestamp update_time;
+
+    private Integer deleted;
+
+    public String getFid() {
+        return fid;
+    }
+
+    public void setFid(String fid) {
+        this.fid = fid;
+    }
+
+    public String getTn() {
+        return tn;
+    }
+
+    public void setTn(String tn) {
+        this.tn = tn;
+    }
+
+    public String getName() {
+        return name;
+    }
+
+    public void setName(String name) {
+        this.name = name;
+    }
+
+    public String getName_cn() {
+        return name_cn;
+    }
+
+    public void setName_cn(String name_cn) {
+        this.name_cn = name_cn;
+    }
+
+    public String getDescription() {
+        return description;
+    }
+
+    public void setDescription(String description) {
+        this.description = description;
+    }
+
+    public String getOdps_type() {
+        return odps_type;
+    }
+
+    public void setOdps_type(String odps_type) {
+        this.odps_type = odps_type;
+    }
+
+    public String getDatabase_type() {
+        return database_type;
+    }
+
+    public void setDatabase_type(String database_type) {
+        this.database_type = database_type;
+    }
+
+    public String getSchema_type() {
+        return schema_type;
+    }
+
+    public void setSchema_type(String schema_type) {
+        this.schema_type = schema_type;
+    }
+
+    public Boolean getRequired() {
+        return required;
+    }
+
+    public void setRequired(Boolean required) {
+        this.required = required;
+    }
+
+    public Boolean getVisible_to_customers() {
+        return visible_to_customers;
+    }
+
+    public void setVisible_to_customers(Boolean visible_to_customers) {
+        this.visible_to_customers = visible_to_customers;
+    }
+
+    public Boolean getNot_empty() {
+        return not_empty;
+    }
+
+    public void setNot_empty(Boolean not_empty) {
+        this.not_empty = not_empty;
+    }
+
+    public Integer getMax_length() {
+        return max_length;
+    }
+
+    public void setMax_length(Integer max_length) {
+        this.max_length = max_length;
+    }
+
+    public Integer getLimit_length() {
+        return limit_length;
+    }
+
+    public void setLimit_length(Integer limit_length) {
+        this.limit_length = limit_length;
+    }
+
+    public Timestamp getCreate_time() {
+        return create_time;
+    }
+
+    public void setCreate_time(Timestamp create_time) {
+        this.create_time = create_time;
+    }
+
+    public Timestamp getUpdate_time() {
+        return update_time;
+    }
+
+    public void setUpdate_time(Timestamp update_time) {
+        this.update_time = update_time;
+    }
+
+    public Integer getDeleted() {
+        return deleted;
+    }
+
+    public void setDeleted(Integer deleted) {
+        this.deleted = deleted;
+    }
+}

+ 135 - 0
src/main/java/com/winhc/bigdata/bean/DynamicEnum.java

@@ -0,0 +1,135 @@
+package com.winhc.bigdata.bean;
+
+/**
+ * 动态字典
+ */
+public enum DynamicEnum {
+    经营异常_新增("company_abnormal_info", "301901", "1", "JingYingYiChangDet", "经营异常", "经营风险"),
+    经营异常_移除("company_abnormal_info", "301902", "1", "JingYingYiChangDet", "经营异常", "经营风险"),
+    法院公告("company_court_announcement","302001", "2", "FaYuanGongGaoDet", "法院公告", "司法风险"),
+    开庭公告("company_court_open_announcement","302101", "3", "KaiTingGongGaoDet", "开庭公告", "司法风险"),
+    立案信息("company_court_register","302201", "4", "LiAnXinXiDet", "立案信息", "司法风险"),
+    环保处罚("company_env_punishment","302401", "5", "HuanBaoChuFaDet", "环保处罚", "经营风险"),
+    严重违法("company_illegal_info","302501", "6", "YanZhongWeiFaXingWeiDet", "严重违法", "经营风险"),
+    行政处罚_信用中国("company_punishment_info_creditchina","302601", "7", "XingZhengChuFaDet", "行政处罚", "经营风险"),
+    送达公告("company_send_announcement","302701", "8", "SongDaGongGaoDet", "送达公告", "司法风险"),
+    税收违法("company_tax_contravention","302801", "9", "ShuiShouWeiFaDet", "税收违法", "经营风险"),
+    司法拍卖("auction_tracking","302901", "10", "PaiMaiXiangQing", "司法拍卖", "司法风险"),
+    欠税公告("company_own_tax","303001", "11", "QianShuiGongGaoDet", "欠税公告", "经营风险"),
+    询价评估结果("zxr_evaluate_results","304501", "12", "XunJiaPingGuDet", "询价评估结果", "经营风险"),
+
+    破产信息("bankruptcy_open_case","303101", "13", "PoChanChongZhengDet", "破产信息", "司法风险"),
+    简易注销("company_brief_cancel_announcement","303201", "14", "JianYiZhuXiao", "简易注销", "经营风险"),
+    失信被执行人("company_dishonest_info","303301", "15", "ShiXinQingKuangDet", "失信被执行人", "司法风险"),
+    公示催告("company_public_announcement","303401", "16", "GongShiCuiGaoDet", "公示催告", "经营风险"),
+    被执行人("company_zxr","303501", "17", "BeiZhiXingRenDet", "被执行人", "司法风险"),
+    终本案件("company_zxr_final_case","303601", "18", "ZhongBenAnJianDet", "终本案件", "司法风险"),
+    限制高消费("company_zxr_restrict","303701", "19", "JYXianZhiGaoXiaoFeiDet", "限制高消费", "司法风险"),
+    限制出境("restrictions_on_exit","303801", "20", "XianZhiChuJingDet", "限制出境", "司法风险"),
+    股权出质("company_equity_info","303901", "21", "GuQuanChuZhiDet", "股权出质", "经营风险"),
+    股权质押("company_equity_pledge_holder","304001", "22", "GuQuanZhiYaDet", "股权质押", "经营风险"),
+    股权冻结("company_judicial_assistance","304101", "23", "GuQuanDongJieDet", "股权冻结", "司法风险"),
+    土地抵押("company_land_mortgage","304201", "24", "TuDiDiYaDet", "土地抵押", "经营风险"),
+    动产抵押("company_mortgage_info","304301", "25", "DongChanDiYaDet", "动产抵押", "经营风险"),
+    裁判文书("wenshu_detail_v2","304401", "26", "JYNewCaselibDet", "裁判文书", "司法风险"),
+
+    抽查检查("company_check_info","201101", "27", "ChouChaJianChaDet", "抽查检查", "经营信息"),
+    双随机抽查("company_double_random_check_info","201201", "28", "ShuangSuiJiChouChaDet", "双随机抽查", "经营信息"),
+    融资信息("company_finance","201301", "29", "RongZhiXinXi", "融资信息", "经营信息"),
+//    行政许可("company_license","201401", "30", "XingZhenXuKeDet", "行政许可", "经营信息"),
+    行政许可_信用中国("company_license_creditchina_new","201401", "30", "XingZhenXuKeDet", "行政许可", "经营信息"),
+    购地信息("company_land_announcement","201501", "31", "GouDiXinXiDet", "购地信息", "经营信息"),
+    地块公示("company_land_publicity","201601", "32", "TuDiXinXiDet", "地块公示", "经营信息"),
+    土地转让("company_land_transfer","201701", "33", "TuDiZhuanRangDet", "土地转让", "经营信息"),
+    知识产权("intellectual","201801", "34", "","知识产权", "经营信息"),
+
+
+    // "工商信息"
+    公司名称变更("company_change","101001", "35", "","公司名称变更", "工商信息"),
+    注册资本变更("company_change", "101002", "36", "","注册资本变更", "工商信息"),
+    注册地址变更("company_change", "101004", "37", "","注册地址变更", "工商信息"),
+    经营范围变更("company_change","101005", "38", "","经营范围变更", "工商信息"),
+    公司状态变更("company_change", "101006", "39", "","公司状态变更", "工商信息"),
+    法定代表人变更_公司("company", "101003", "40", "","法定代表人变更", "工商信息"),
+    法定代表人变更_公司_增量("company", "110101", "40", "","法定代表人变更", "工商信息"),
+
+    // 三合1  109001、109002、109005 -》 109001
+    成员变更_人员("company", "109001", "44", "","成员变更", "工商信息"),
+
+    // 二合1 101101、101102 -》 101101
+    股东变更_公司("company_holder", "101101", "42", "GuDongBianGengDet","股东变更", "工商信息"),
+
+    // 二合1 109003、109004 -》 109003
+    股东变更_人员("company_holder", "109003", "43", "DuiWaiTouZiBianGeng","对外投资", "工商信息"),
+
+    主要成员变更_公司("company_staff", "101301", "44", "ZhuYaoChengYuanBianDet","主要成员变更", "工商信息"),
+
+    商标("company_tm", "404601", "51", "ShangBiaoDet", "商标信息", "知识产权"),
+    专利("company_patent_new", "404501", "52", "ZhuanLiDet", "专利信息", "知识产权"),
+    网站备案("company_icp", "404701", "53", "ZhuCeWangZhanDet", "网站备案", "知识产权"),
+    //默认("default", "0", "999", "","默认",""),
+    ;
+    private String tn;
+    private String dynamicCode;
+    private String number;
+    private String nameEn;
+    private String name;
+    private String fatherName;
+
+    DynamicEnum(String tn, String dynamicCode, String number, String nameEn, String name, String fatherName) {
+        this.tn = tn;
+        this.dynamicCode = dynamicCode;
+        this.number = number;
+        this.nameEn = nameEn;
+        this.name = name;
+        this.fatherName = fatherName;
+    }
+
+    public String getTn() {
+        return tn;
+    }
+
+    public void setTn(String tn) {
+        this.tn = tn;
+    }
+
+    public String getDynamicCode() {
+        return dynamicCode;
+    }
+
+    public void setDynamicCode(String dynamicCode) {
+        this.dynamicCode = dynamicCode;
+    }
+
+    public String getNumber() {
+        return number;
+    }
+
+    public void setNumber(String number) {
+        this.number = number;
+    }
+
+    public String getNameEn() {
+        return nameEn;
+    }
+
+    public void setNameEn(String nameEn) {
+        this.nameEn = nameEn;
+    }
+
+    public String getName() {
+        return name;
+    }
+
+    public void setName(String name) {
+        this.name = name;
+    }
+
+    public String getFatherName() {
+        return fatherName;
+    }
+
+    public void setFatherName(String fatherName) {
+        this.fatherName = fatherName;
+    }
+}

+ 1 - 1
src/main/java/com/winhc/bigdata/udf/CleanupNotNumber.java

@@ -26,6 +26,6 @@ public class CleanupNotNumber extends UDF {
 
     public static void main(String[] args) {
         CleanupNotNumber j = new CleanupNotNumber();
-        System.out.println(j.evaluate("38000.00000000万元人民币元"));
+        System.out.println(j.evaluate("38000.00000100万元人民币元"));
     }
 }

Разлика између датотеке није приказан због своје велике величине
+ 117 - 0
src/main/java/com/winhc/bigdata/udf/DataToStrV2.java


+ 38 - 0
src/main/java/com/winhc/bigdata/udf/DynamicCodeTrans.java

@@ -0,0 +1,38 @@
+package com.winhc.bigdata.udf;
+
+import com.aliyun.odps.udf.UDF;
+import com.winhc.bigdata.bean.DynamicEnum;
+import org.apache.commons.lang.StringEscapeUtils;
+import org.apache.commons.lang3.StringUtils;
+
+import java.util.Arrays;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+/**
+ * @author: π
+ * 2021/8/30 16:57
+ */
+public class DynamicCodeTrans extends UDF {
+
+
+    public String evaluate(String code) {
+        if (StringUtils.isBlank(code)) {
+            return null;
+        }
+        DynamicEnum[] vv = DynamicEnum.values();
+        Map<String, String> mm = Arrays.stream(vv).collect(Collectors.toMap(DynamicEnum::getTn, DynamicEnum::getFatherName, (k1, k2) -> k1));
+
+        return mm.getOrDefault(code, null);
+    }
+
+
+
+    public static void main(String[] args) {
+        DynamicCodeTrans j = new DynamicCodeTrans();
+        System.out.println(j.evaluate("company_holder"));
+        System.out.println(j.evaluate("company_finance"));
+
+    }
+
+}

+ 11 - 0
src/main/java/com/winhc/bigdata/udf/processMoney.java

@@ -33,6 +33,17 @@ public class processMoney extends UDF {
         return amtstr;
     }
 
+    public Double evaluate(String amt1, String amt2, Integer length) {
+        Double r1;
+        if (StringUtils.isBlank(amt1) || StringUtils.isBlank(amt2)) return 0d;
+        try {
+            r1 = div(amt1, amt2, length);
+        } catch (Exception e) {
+            r1 = 0d;
+        }
+        return r1;
+    }
+
     public static Double amt_div(String amt1, String amt2, Integer length) {
         Double r1;
         if (StringUtils.isBlank(amt1) || StringUtils.isBlank(amt2)) return 0d;

+ 36 - 0
src/main/java/com/winhc/bigdata/udf/split_names.java

@@ -10,6 +10,7 @@ import com.google.gson.reflect.TypeToken;
 
 import java.util.*;
 import java.util.stream.Collectors;
+import java.util.stream.Stream;
 
 /**
  * @Author: π
@@ -34,6 +35,40 @@ public class split_names extends UDF {
         }
     }
 
+    public List<String> evaluate(List<String> list, String suffix) {
+        if (list == null || list.isEmpty()) {
+            return Collections.emptyList();
+        }
+        return list.stream()
+                .filter(org.apache.commons.lang3.StringUtils::isNotBlank)
+                .map(x -> x + suffix)
+                .distinct()
+                .collect(Collectors.toList());
+    }
+
+
+    public List<String> evaluate(List<String>... list) {
+        return Stream.of(list)
+                .flatMap(Collection::stream)
+                .filter(org.apache.commons.lang3.StringUtils::isNotBlank)
+                .distinct()
+                .collect(Collectors.toList());
+    }
+
+    public List<String> evaluate(String lists) {
+
+        if (StringUtils.isBlank(lists)) {
+            return new ArrayList<>();
+        }
+        List<String> re = Arrays.stream(lists.split("&"))
+                .flatMap(xx -> JSON.parseArray(xx).toJavaList(String.class).stream())
+                .filter(StringUtils::isNotBlank)
+                .distinct()
+                .collect(Collectors.toList());
+
+        return re;
+    }
+
     public List<String> evaluate(String supplier_info, String purchaser_info, String winner_candidate, String wtbamt_info, String json_path) {
         List<String> list1 = toList(supplier_info, json_path, "_supplier");
         List<String> list2 = toList(purchaser_info, json_path, "_purchaser");
@@ -86,6 +121,7 @@ public class split_names extends UDF {
         //String json_path = "$[*]";
         split_names n = new split_names();
         System.out.println(n.evaluate(jsons, jsons, jsons, jsons, json_path));
+        System.out.println(n.evaluate("[\"11\"]&[\"22\"]"));
     }
 
 }