123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456 |
- package com.winhc.task.util;
- import com.alibaba.fastjson.JSON;
- import com.alibaba.fastjson.JSONArray;
- import com.alibaba.fastjson.JSONObject;
- import com.alibaba.fastjson.JSONPath;
- import lombok.val;
- import org.apache.commons.codec.digest.DigestUtils;
- import org.apache.commons.collections.CollectionUtils;
- import org.apache.commons.lang3.StringUtils;
- import org.apache.hadoop.hbase.Cell;
- import org.apache.hadoop.hbase.CellUtil;
- import org.apache.hadoop.hbase.TableName;
- import org.apache.hadoop.hbase.client.Result;
- import org.apache.hadoop.hbase.util.Bytes;
- import java.text.DecimalFormat;
- import java.text.ParseException;
- import java.text.SimpleDateFormat;
- import java.util.*;
- import java.util.concurrent.ForkJoinPool;
- import java.util.function.Function;
- import java.util.regex.Pattern;
- import java.util.stream.Collectors;
- import java.util.stream.Stream;
- /**
- * @author π
- * @Description:
- * @date 2021/10/19 14:15
- */
- public class BaseUtils {
- public static Boolean isWindows() {
- return System.getProperty("os.name").contains("Windows");
- }
- private static final Pattern pattern = Pattern.compile("[^\\u4e00-\\u9fa50-9a-zA-Z]");
- public static <T> List<T> tryToJavaList(JSONArray array, Class<T> clazz) {
- if (CollectionUtils.isEmpty(array)) {
- return Collections.emptyList();
- }
- return array.toJavaList(clazz);
- }
- public static String getFormatDateString(JSONObject j, String field) {
- return formatDate(j.getString(field));
- }
- public static String cleanup(String s) {
- if (StringUtils.isBlank(s)) return "";
- return pattern.matcher(s).replaceAll("");
- }
- public static String dim2tn(String dim) {
- return "NG_RT_" + dim;
- }
- public static String equity_info_rowkey(JSONArray pledgee_info, JSONArray pledgor_info) {
- return getKey2Str("pledgee", pledgee_info) + " " + getKey2Str("pledgor", pledgor_info);
- }
- public static String getKey2Str(String key, JSONArray value) {
- if (value == null || value.isEmpty()) {
- return "";
- }
- HashSet<String> set = new HashSet<>();
- value.forEach(x -> {
- JSONObject j = (JSONObject) x;
- set.add(j.getString(key));
- });
- return set.stream().distinct().sorted().collect(Collectors.joining("、"));
- }
- public static String getCompanyCourtOpenAnnouncementRowkey(String companyId, JSONObject j) {
- String start_date = splitDate(j.getString("start_date"));
- if (StringUtils.isEmpty(start_date)) {
- return null;
- }
- JSONArray defendant_info = Optional.ofNullable(j.getJSONArray("defendant_info")).orElseGet(JSONArray::new);
- JSONArray plaintiff_info = Optional.ofNullable(j.getJSONArray("plaintiff_info")).orElseGet(JSONArray::new);
- String sorted_litigant = StreamEnhance.append(defendant_info.toJavaList(JSONObject.class).stream(), plaintiff_info.toJavaList(JSONObject.class).stream())
- .map(l -> l.getString("name"))
- .filter(StringUtils::isNotEmpty)
- .distinct()
- .sorted()
- .collect(Collectors.joining());
- if (StringUtils.isEmpty(sorted_litigant)) {
- return null;
- }
- return md5(cleanup(concatws("", start_date, sorted_litigant)));
- }
- public static String get_text_from_json(String json, String name) {
- if (StringUtils.isEmpty(json)) return "";
- try {
- List<JSONObject> list = JSON.parseArray(json, JSONObject.class);
- if (list.isEmpty()) return "";
- return list.stream().map(d -> d.getString(name))
- .filter(StringUtils::isNotBlank)
- .sorted(Comparator.naturalOrder())
- .collect(Collectors.joining(""));
- } catch (Exception e) {
- return "";
- }
- }
- private static final Pattern first_p = Pattern.compile("[^\\u4e00-\\u9fa50-9a-zA-Z()()]");
- private static final Pattern second_p = Pattern.compile("[((][^((]+[))]$");
- private static final Pattern third_p = Pattern.compile("[^\\u4e00-\\u9fa50-9a-zA-Z]");
- private static final Pattern html_p = Pattern.compile("<[^>]+>");
- private static final Pattern date_format_p = Pattern.compile("^\\d{4}-\\d{2}-\\d{2}$");
- private static final Pattern replace_char_p = Pattern.compile("[年月]");
- public static String cleanupChange(String val) {
- if (com.aliyun.odps.utils.StringUtils.isEmpty(val)) return "";
- val = html_p.matcher(val).replaceAll("");
- val = first_p.matcher(val).replaceAll("");
- val = second_p.matcher(val).replaceAll("");
- return third_p.matcher(val).replaceAll("");
- }
- public static String substr(String str, Integer num) {
- if (StringUtils.isEmpty(str)) {
- return "";
- } else {
- return str.substring(num);
- }
- }
- public static String formatDate(String date) {
- if (StringUtils.isEmpty(date)) return null;
- if (date.contains(" ")) return date.split(" ")[0];
- return date;
- }
- public static String toDBC(String str) {
- if (str == null) {
- return null;
- }
- char[] c = str.toCharArray();
- for (int i = 0; i < c.length; i++) {
- if (c[i] == '\u3000') {
- c[i] = ' ';
- } else if (c[i] > '\uFF00' && c[i] < '\uFF5F') {
- c[i] = (char) (c[i] - 65248);
- }
- }
- return new String(c);
- }
- public static String md5(String s) {
- return DigestUtils.md5Hex(s);
- }
- public static String concatws(String seq, String... strings) {
- return Stream.of(strings).filter(Objects::nonNull).collect(Collectors.joining(seq));
- }
- public static TableName getHbaseTable(String dim) {
- return TableName.valueOf("NG_RT_" + dim.toUpperCase(Locale.ROOT));
- }
- public static JSONObject toJSONObjectLowerCase(Result r) {
- if (r == null || r.isEmpty()) {
- return null;
- }
- val rowkey = getROWString(r);
- JSONObject result = new JSONObject();
- result.put("rowkey", rowkey);
- for (Cell cell : r.listCells()) {
- val key = Bytes.toString(CellUtil.cloneQualifier(cell));
- val value = Bytes.toString(CellUtil.cloneValue(cell));
- result.put(key.toLowerCase(Locale.ENGLISH), StringUtils.isBlank(value) ? null : value);
- }
- return result;
- }
- public static String getROWString(Result r) {
- return Bytes.toString(r.getRow());
- }
- public static JSONObject transFields(JSONObject j, List<String> exportFields, Function<JSONObject, JSONObject> handles) {
- JSONObject tmp = new JSONObject(j);
- if (handles != null) {
- tmp = handles.apply(j);
- }
- JSONObject finalJ = new JSONObject();
- tmp.forEach((k, v) -> {
- if (exportFields.contains(k)) {
- finalJ.put(k, v);
- }
- });
- return finalJ;
- }
- public static String splitDate(String date) {
- try {
- if (StringUtils.isEmpty(date)) {
- return null;
- } else {
- return date.split(" ")[0];
- }
- } catch (Exception e) {
- return null;
- }
- }
- public static String getCompanyOrgTypeStd(String company_name, String company_org_type2, String credit_code) {
- if (StringUtils.isBlank(company_name)) return null;
- Pattern pattern1 = Pattern.compile("[^\\u4e00-\\u9fa5()()]");
- if (credit_code != null && credit_code.startsWith("93")) {
- return "集体所有制";
- }
- if (company_name.contains("有限责任公司")) return "有限责任公司";
- else if (company_name.contains("股份")) return "股份有限公司";
- else if (company_name.contains("有限公司")) return "有限责任公司";
- else {
- if (StringUtils.isEmpty(company_org_type2)) return null;
- String company_org_type = pattern1.matcher(company_org_type2).replaceAll("");
- if (StringUtils.isEmpty(company_org_type)) return null;
- if (company_org_type.contains("有限责任公司") || company_org_type.contains("一人有限责任公司") || company_org_type.contains("一人有限责任"))
- return "有限责任公司";
- else if (company_org_type.contains("股份")) return "股份有限公司";
- else if (company_org_type.contains("个人独资企业")) return "独资企业";
- else if (company_org_type.contains("个人经营") || company_org_type.contains("个体")) return "个体工商户";
- else if (company_org_type.contains("专业合作") || company_org_type.contains("合作社") || company_org_type.contains("集体"))
- return "集体所有制";
- else if (company_org_type.contains("有限") && company_org_type.contains("合伙")) return "有限合伙";
- else if (company_org_type.contains("合伙")) return "普通合伙";
- else if (company_org_type.contains("国有") || company_org_type.contains("全民所有制") || company_org_type.contains("国资"))
- return "国企";
- else if (company_org_type.contains("外商") || company_org_type.contains("中外") || company_org_type.contains("外国") || company_org_type.contains("外资"))
- return "外商投资企业";
- else if (company_org_type.contains("联营")) return "联营企业";
- else return null;
- }
- }
- public static <V> V computeIfAbsentWithType(JSONObject j, String key, Class<V> clazz, Function<JSONObject, V> mappingFunction) {
- Objects.requireNonNull(mappingFunction);
- V v;
- if ((v = j.getObject(key, clazz)) == null) {
- V newValue;
- if ((newValue = mappingFunction.apply(j)) != null) {
- j.put(key, newValue);
- return newValue;
- }
- }
- return v;
- }
- public static Double companyScoreWeight(String reg_status, String cname, String reg_capital_amount, String company_type) {
- if (cname == null || cleanup(cname).replaceAll("[0-9]", "").length() == 1)
- return 0.01D;
- if (cleanup(cname).replaceAll("[0-9]", "").length() <= 3)
- return 0.3D;
- if (reg_status == null || reg_status.contains("销") || reg_status.contains("消"))
- return 1D;
- Double amount = reg_capital_amount == null ? 0L : Double.parseDouble(reg_capital_amount);
- Double w = Math.log(amount / 10000000 + 1) + 1;
- if ("1".equals(company_type))
- w = w + 3;
- return w;
- }
- public static JSONObject getCompanyName(String name) {
- if (StringUtils.isEmpty(name)) return null;
- else {
- String value = cleanup(name);
- JSONObject j = new JSONObject();
- j.put("show", name.replaceAll("\t;", ""));
- j.put("value", value);
- return j;
- }
- }
- public static String toMillisTimestamp(String date) {
- if (StringUtils.isEmpty(date)) {
- return null;
- }
- String p = "yyyy-MM-dd HH:mm:ss";
- if (date.length() == 10) {
- p = "yyyy-MM-dd";
- }
- SimpleDateFormat fm = new SimpleDateFormat(p);
- try {
- return String.valueOf(fm.parse(date).getTime());
- } catch (Exception e) {
- return null;
- }
- }
- public static List<String> getPhonesEmails(String str) {
- if (StringUtils.isEmpty(str)) {
- return null;
- } else {
- return Arrays.stream(str.split("\t;\t"))
- .map(x -> x.replaceAll("\t;", ""))
- .collect(Collectors.toList());
- }
- }
- public static String getGeo(String lat, String lng) {
- if (StringUtils.isEmpty(lat) || StringUtils.isEmpty(lng)) {
- return null;
- }
- return lat + "," + lng;
- }
- public static final Map<String, Double> unit_w = new HashMap<String, Double>() {{
- put("欧元", 7.9145);
- put("英镑", 8.7751);
- put("美元", 6.5301);
- put("港元", 0.8425);
- put("港币", 0.8425);
- put("台币", 0.2316);
- put("日元", 0.06270);
- put("加拿大元", 5.1096);
- put("新加坡元", 4.8907);
- put("澳元", 4.846);
- put("卢布", 0.08825);
- put("泰铢", 0.2162);
- put("韩元", 0.006025);
- put("澳门元", 0.8178);
- }};
- public static String avoidNullString(String s) {
- if (StringUtils.isBlank(s)) return "";
- return s;
- }
- public static boolean cleanEquals(String a, String b) {
- return cleanup(a).equals(cleanup(b));
- }
- public static double tryAmount(String regCapital) {
- Double amount = getAmount(regCapital);
- if (amount == null) return 0d;
- return amount;
- }
- public static Double getAmount(String regCapital) {
- DecimalFormat df = new DecimalFormat("00.00");
- if (StringUtils.isEmpty(regCapital))
- return null;
- double unit = unit_w.entrySet().stream()
- .filter(e -> regCapital.contains(e.getKey()))
- .mapToDouble(Map.Entry::getValue)
- .findFirst().orElse(1d);
- if (regCapital.contains("亿")) {
- unit *= 100000000.0d;
- } else if (regCapital.contains("百万")) {
- unit *= 1000000.0d;
- } else if (regCapital.contains("万")) {
- unit *= 10000.0d;
- }
- unit *= 100.0d;
- try {
- double amount = Double.parseDouble(regCapital.replaceAll("[^0-9.]", " ").split("\\s+")[0]);
- return Double.valueOf(df.format(amount * unit));
- } catch (Exception e) {
- return null;
- }
- }
- public static List<String> parseJsonArray(String json, String json_path) {
- if (StringUtils.isBlank(json) || StringUtils.isBlank(json_path) || "[]".equalsIgnoreCase(json)) {
- return Collections.emptyList();
- }
- return ((JSONArray) JSONPath.eval(JSON.parse(json), json_path)).stream()
- .filter(x -> x != null && StringUtils.isNotBlank(x.toString()))
- .map(Object::toString)
- .distinct()
- .collect(Collectors.toList());
- }
- public static String nameCleanup(String s) {
- if (StringUtils.isBlank(s)) return "";
- Pattern pattern = Pattern.compile("[^\\u4e00-\\u9fa50-9a-zA-Z()() ·,]");
- return pattern.matcher(s.replaceAll("[、,;]", ","))
- .replaceAll("").replaceAll(" +", " ");
- }
- public static String trim_url_domain(String url) {
- Pattern url_pattern = Pattern.compile("^((http://)|(https://))?(www\\.)?([0-9a-zA-Z\\-_\\u4e00-\\u9fa5]+(\\.[a-zA-Z\\u4e00-\\u9fa5]+){1,2}).*$");
- if (StringUtils.isEmpty(url)) {
- return null;
- } else {
- val matcher = url_pattern.matcher(url);
- if (matcher.matches()) {
- return matcher.group(5);
- } else {
- return null;
- }
- }
- }
- public static String upper(String url) {
- if (StringUtils.isBlank(url)) return url;
- return url.toUpperCase();
- }
- public static String toStringV2(Object str) {
- return str == null ? "" : str.toString();
- }
- public static String getKey(JSONObject j, String key) {
- if (j == null) return "";
- String r = j.getString(key);
- return r == null ? "" : r;
- }
- public static List<String> parse_array(String url) {
- if (StringUtils.isEmpty(url)) {
- return new ArrayList<>();
- } else {
- return Arrays.stream(url.replaceAll("[;\t\n;,。]", ",")
- .split(","))
- .filter(StringUtils::isNotBlank)
- .distinct()
- .collect(Collectors.toList());
- }
- }
- public static ForkJoinPool COMMON_POOL = new ForkJoinPool(Math.max(Runtime.getRuntime().availableProcessors() * 3, 16));
- public static void main(String[] args) throws ParseException {
- String u1 = "www.baijinggame.cn,baijinggame.com,www.baijinggame.com,yuxianweb.cn,www.yuxianweb.cn";
- String u2 = "https://网络.中国,www.yuxianweb.com,baijinggame.cn,yuxianweb.cn,www.yuxianweb.cn";
- String u3 = "https://网络.中国,www.yuxianweb.com,baijinggame.cn,www.baijinggame.cn,baijinggame.com,www.baijinggame.com,yuxianweb.cn,www.yuxianweb.cn";
- System.out.println(upper("https://网络.中国"));
- System.out.println(trim_url_domain("https://网络.中国"));
- System.out.println(nameCleanup("小米科技.;有,@限公 司 雷军"));
- System.out.println(parseJsonArray("[{\"name\":\"张海林\",\"litigant_id\":\"\"},{\"name\":\"招商银行股份有限公司信用卡中心\",\"litigant_id\":null}]", "$.litigant_id"));
- System.out.println(getAmount("100万人民币"));
- System.out.println(getGeo("29.2487411737156", "115.821420023591"));
- System.out.println(getPhonesEmails("0792-4395333\t;\t18624366098\t;\t17779218631\t;\t"));
- System.out.println(toMillisTimestamp("2021-11-12 12:20:30"));
- System.out.println(getCompanyName("新疆现代特油科技(股份)有限公司"));
- System.out.println(getCompanyOrgTypeStd("小米有", "有限责任公司", "12346"));
- System.out.println(companyScoreWeight("存续(在营、开业、在册)", "新疆现代特油科技股份有限公司", "200309577000000", "1"));
- }
- }
|