BaseUtils.java 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456
  1. package com.winhc.task.util;
  2. import com.alibaba.fastjson.JSON;
  3. import com.alibaba.fastjson.JSONArray;
  4. import com.alibaba.fastjson.JSONObject;
  5. import com.alibaba.fastjson.JSONPath;
  6. import lombok.val;
  7. import org.apache.commons.codec.digest.DigestUtils;
  8. import org.apache.commons.collections.CollectionUtils;
  9. import org.apache.commons.lang3.StringUtils;
  10. import org.apache.hadoop.hbase.Cell;
  11. import org.apache.hadoop.hbase.CellUtil;
  12. import org.apache.hadoop.hbase.TableName;
  13. import org.apache.hadoop.hbase.client.Result;
  14. import org.apache.hadoop.hbase.util.Bytes;
  15. import java.text.DecimalFormat;
  16. import java.text.ParseException;
  17. import java.text.SimpleDateFormat;
  18. import java.util.*;
  19. import java.util.concurrent.ForkJoinPool;
  20. import java.util.function.Function;
  21. import java.util.regex.Pattern;
  22. import java.util.stream.Collectors;
  23. import java.util.stream.Stream;
  24. /**
  25. * @author π
  26. * @Description:
  27. * @date 2021/10/19 14:15
  28. */
  29. public class BaseUtils {
  30. public static Boolean isWindows() {
  31. return System.getProperty("os.name").contains("Windows");
  32. }
  33. private static final Pattern pattern = Pattern.compile("[^\\u4e00-\\u9fa50-9a-zA-Z]");
  34. public static <T> List<T> tryToJavaList(JSONArray array, Class<T> clazz) {
  35. if (CollectionUtils.isEmpty(array)) {
  36. return Collections.emptyList();
  37. }
  38. return array.toJavaList(clazz);
  39. }
  40. public static String getFormatDateString(JSONObject j, String field) {
  41. return formatDate(j.getString(field));
  42. }
  43. public static String cleanup(String s) {
  44. if (StringUtils.isBlank(s)) return "";
  45. return pattern.matcher(s).replaceAll("");
  46. }
  47. public static String dim2tn(String dim) {
  48. return "NG_RT_" + dim;
  49. }
  50. public static String equity_info_rowkey(JSONArray pledgee_info, JSONArray pledgor_info) {
  51. return getKey2Str("pledgee", pledgee_info) + " " + getKey2Str("pledgor", pledgor_info);
  52. }
  53. public static String getKey2Str(String key, JSONArray value) {
  54. if (value == null || value.isEmpty()) {
  55. return "";
  56. }
  57. HashSet<String> set = new HashSet<>();
  58. value.forEach(x -> {
  59. JSONObject j = (JSONObject) x;
  60. set.add(j.getString(key));
  61. });
  62. return set.stream().distinct().sorted().collect(Collectors.joining("、"));
  63. }
  64. public static String getCompanyCourtOpenAnnouncementRowkey(String companyId, JSONObject j) {
  65. String start_date = splitDate(j.getString("start_date"));
  66. if (StringUtils.isEmpty(start_date)) {
  67. return null;
  68. }
  69. JSONArray defendant_info = Optional.ofNullable(j.getJSONArray("defendant_info")).orElseGet(JSONArray::new);
  70. JSONArray plaintiff_info = Optional.ofNullable(j.getJSONArray("plaintiff_info")).orElseGet(JSONArray::new);
  71. String sorted_litigant = StreamEnhance.append(defendant_info.toJavaList(JSONObject.class).stream(), plaintiff_info.toJavaList(JSONObject.class).stream())
  72. .map(l -> l.getString("name"))
  73. .filter(StringUtils::isNotEmpty)
  74. .distinct()
  75. .sorted()
  76. .collect(Collectors.joining());
  77. if (StringUtils.isEmpty(sorted_litigant)) {
  78. return null;
  79. }
  80. return md5(cleanup(concatws("", start_date, sorted_litigant)));
  81. }
  82. public static String get_text_from_json(String json, String name) {
  83. if (StringUtils.isEmpty(json)) return "";
  84. try {
  85. List<JSONObject> list = JSON.parseArray(json, JSONObject.class);
  86. if (list.isEmpty()) return "";
  87. return list.stream().map(d -> d.getString(name))
  88. .filter(StringUtils::isNotBlank)
  89. .sorted(Comparator.naturalOrder())
  90. .collect(Collectors.joining(""));
  91. } catch (Exception e) {
  92. return "";
  93. }
  94. }
  95. private static final Pattern first_p = Pattern.compile("[^\\u4e00-\\u9fa50-9a-zA-Z()()]");
  96. private static final Pattern second_p = Pattern.compile("[((][^((]+[))]$");
  97. private static final Pattern third_p = Pattern.compile("[^\\u4e00-\\u9fa50-9a-zA-Z]");
  98. private static final Pattern html_p = Pattern.compile("<[^>]+>");
  99. private static final Pattern date_format_p = Pattern.compile("^\\d{4}-\\d{2}-\\d{2}$");
  100. private static final Pattern replace_char_p = Pattern.compile("[年月]");
  101. public static String cleanupChange(String val) {
  102. if (com.aliyun.odps.utils.StringUtils.isEmpty(val)) return "";
  103. val = html_p.matcher(val).replaceAll("");
  104. val = first_p.matcher(val).replaceAll("");
  105. val = second_p.matcher(val).replaceAll("");
  106. return third_p.matcher(val).replaceAll("");
  107. }
  108. public static String substr(String str, Integer num) {
  109. if (StringUtils.isEmpty(str)) {
  110. return "";
  111. } else {
  112. return str.substring(num);
  113. }
  114. }
  115. public static String formatDate(String date) {
  116. if (StringUtils.isEmpty(date)) return null;
  117. if (date.contains(" ")) return date.split(" ")[0];
  118. return date;
  119. }
  120. public static String toDBC(String str) {
  121. if (str == null) {
  122. return null;
  123. }
  124. char[] c = str.toCharArray();
  125. for (int i = 0; i < c.length; i++) {
  126. if (c[i] == '\u3000') {
  127. c[i] = ' ';
  128. } else if (c[i] > '\uFF00' && c[i] < '\uFF5F') {
  129. c[i] = (char) (c[i] - 65248);
  130. }
  131. }
  132. return new String(c);
  133. }
  134. public static String md5(String s) {
  135. return DigestUtils.md5Hex(s);
  136. }
  137. public static String concatws(String seq, String... strings) {
  138. return Stream.of(strings).filter(Objects::nonNull).collect(Collectors.joining(seq));
  139. }
  140. public static TableName getHbaseTable(String dim) {
  141. return TableName.valueOf("NG_RT_" + dim.toUpperCase(Locale.ROOT));
  142. }
  143. public static JSONObject toJSONObjectLowerCase(Result r) {
  144. if (r == null || r.isEmpty()) {
  145. return null;
  146. }
  147. val rowkey = getROWString(r);
  148. JSONObject result = new JSONObject();
  149. result.put("rowkey", rowkey);
  150. for (Cell cell : r.listCells()) {
  151. val key = Bytes.toString(CellUtil.cloneQualifier(cell));
  152. val value = Bytes.toString(CellUtil.cloneValue(cell));
  153. result.put(key.toLowerCase(Locale.ENGLISH), StringUtils.isBlank(value) ? null : value);
  154. }
  155. return result;
  156. }
  157. public static String getROWString(Result r) {
  158. return Bytes.toString(r.getRow());
  159. }
  160. public static JSONObject transFields(JSONObject j, List<String> exportFields, Function<JSONObject, JSONObject> handles) {
  161. JSONObject tmp = new JSONObject(j);
  162. if (handles != null) {
  163. tmp = handles.apply(j);
  164. }
  165. JSONObject finalJ = new JSONObject();
  166. tmp.forEach((k, v) -> {
  167. if (exportFields.contains(k)) {
  168. finalJ.put(k, v);
  169. }
  170. });
  171. return finalJ;
  172. }
  173. public static String splitDate(String date) {
  174. try {
  175. if (StringUtils.isEmpty(date)) {
  176. return null;
  177. } else {
  178. return date.split(" ")[0];
  179. }
  180. } catch (Exception e) {
  181. return null;
  182. }
  183. }
  184. public static String getCompanyOrgTypeStd(String company_name, String company_org_type2, String credit_code) {
  185. if (StringUtils.isBlank(company_name)) return null;
  186. Pattern pattern1 = Pattern.compile("[^\\u4e00-\\u9fa5()()]");
  187. if (credit_code != null && credit_code.startsWith("93")) {
  188. return "集体所有制";
  189. }
  190. if (company_name.contains("有限责任公司")) return "有限责任公司";
  191. else if (company_name.contains("股份")) return "股份有限公司";
  192. else if (company_name.contains("有限公司")) return "有限责任公司";
  193. else {
  194. if (StringUtils.isEmpty(company_org_type2)) return null;
  195. String company_org_type = pattern1.matcher(company_org_type2).replaceAll("");
  196. if (StringUtils.isEmpty(company_org_type)) return null;
  197. if (company_org_type.contains("有限责任公司") || company_org_type.contains("一人有限责任公司") || company_org_type.contains("一人有限责任"))
  198. return "有限责任公司";
  199. else if (company_org_type.contains("股份")) return "股份有限公司";
  200. else if (company_org_type.contains("个人独资企业")) return "独资企业";
  201. else if (company_org_type.contains("个人经营") || company_org_type.contains("个体")) return "个体工商户";
  202. else if (company_org_type.contains("专业合作") || company_org_type.contains("合作社") || company_org_type.contains("集体"))
  203. return "集体所有制";
  204. else if (company_org_type.contains("有限") && company_org_type.contains("合伙")) return "有限合伙";
  205. else if (company_org_type.contains("合伙")) return "普通合伙";
  206. else if (company_org_type.contains("国有") || company_org_type.contains("全民所有制") || company_org_type.contains("国资"))
  207. return "国企";
  208. else if (company_org_type.contains("外商") || company_org_type.contains("中外") || company_org_type.contains("外国") || company_org_type.contains("外资"))
  209. return "外商投资企业";
  210. else if (company_org_type.contains("联营")) return "联营企业";
  211. else return null;
  212. }
  213. }
  214. public static <V> V computeIfAbsentWithType(JSONObject j, String key, Class<V> clazz, Function<JSONObject, V> mappingFunction) {
  215. Objects.requireNonNull(mappingFunction);
  216. V v;
  217. if ((v = j.getObject(key, clazz)) == null) {
  218. V newValue;
  219. if ((newValue = mappingFunction.apply(j)) != null) {
  220. j.put(key, newValue);
  221. return newValue;
  222. }
  223. }
  224. return v;
  225. }
  226. public static Double companyScoreWeight(String reg_status, String cname, String reg_capital_amount, String company_type) {
  227. if (cname == null || cleanup(cname).replaceAll("[0-9]", "").length() == 1)
  228. return 0.01D;
  229. if (cleanup(cname).replaceAll("[0-9]", "").length() <= 3)
  230. return 0.3D;
  231. if (reg_status == null || reg_status.contains("销") || reg_status.contains("消"))
  232. return 1D;
  233. Double amount = reg_capital_amount == null ? 0L : Double.parseDouble(reg_capital_amount);
  234. Double w = Math.log(amount / 10000000 + 1) + 1;
  235. if ("1".equals(company_type))
  236. w = w + 3;
  237. return w;
  238. }
  239. public static JSONObject getCompanyName(String name) {
  240. if (StringUtils.isEmpty(name)) return null;
  241. else {
  242. String value = cleanup(name);
  243. JSONObject j = new JSONObject();
  244. j.put("show", name.replaceAll("\t;", ""));
  245. j.put("value", value);
  246. return j;
  247. }
  248. }
  249. public static String toMillisTimestamp(String date) {
  250. if (StringUtils.isEmpty(date)) {
  251. return null;
  252. }
  253. String p = "yyyy-MM-dd HH:mm:ss";
  254. if (date.length() == 10) {
  255. p = "yyyy-MM-dd";
  256. }
  257. SimpleDateFormat fm = new SimpleDateFormat(p);
  258. try {
  259. return String.valueOf(fm.parse(date).getTime());
  260. } catch (Exception e) {
  261. return null;
  262. }
  263. }
  264. public static List<String> getPhonesEmails(String str) {
  265. if (StringUtils.isEmpty(str)) {
  266. return null;
  267. } else {
  268. return Arrays.stream(str.split("\t;\t"))
  269. .map(x -> x.replaceAll("\t;", ""))
  270. .collect(Collectors.toList());
  271. }
  272. }
  273. public static String getGeo(String lat, String lng) {
  274. if (StringUtils.isEmpty(lat) || StringUtils.isEmpty(lng)) {
  275. return null;
  276. }
  277. return lat + "," + lng;
  278. }
  279. public static final Map<String, Double> unit_w = new HashMap<String, Double>() {{
  280. put("欧元", 7.9145);
  281. put("英镑", 8.7751);
  282. put("美元", 6.5301);
  283. put("港元", 0.8425);
  284. put("港币", 0.8425);
  285. put("台币", 0.2316);
  286. put("日元", 0.06270);
  287. put("加拿大元", 5.1096);
  288. put("新加坡元", 4.8907);
  289. put("澳元", 4.846);
  290. put("卢布", 0.08825);
  291. put("泰铢", 0.2162);
  292. put("韩元", 0.006025);
  293. put("澳门元", 0.8178);
  294. }};
  295. public static String avoidNullString(String s) {
  296. if (StringUtils.isBlank(s)) return "";
  297. return s;
  298. }
  299. public static boolean cleanEquals(String a, String b) {
  300. return cleanup(a).equals(cleanup(b));
  301. }
  302. public static double tryAmount(String regCapital) {
  303. Double amount = getAmount(regCapital);
  304. if (amount == null) return 0d;
  305. return amount;
  306. }
  307. public static Double getAmount(String regCapital) {
  308. DecimalFormat df = new DecimalFormat("00.00");
  309. if (StringUtils.isEmpty(regCapital))
  310. return null;
  311. double unit = unit_w.entrySet().stream()
  312. .filter(e -> regCapital.contains(e.getKey()))
  313. .mapToDouble(Map.Entry::getValue)
  314. .findFirst().orElse(1d);
  315. if (regCapital.contains("亿")) {
  316. unit *= 100000000.0d;
  317. } else if (regCapital.contains("百万")) {
  318. unit *= 1000000.0d;
  319. } else if (regCapital.contains("万")) {
  320. unit *= 10000.0d;
  321. }
  322. unit *= 100.0d;
  323. try {
  324. double amount = Double.parseDouble(regCapital.replaceAll("[^0-9.]", " ").split("\\s+")[0]);
  325. return Double.valueOf(df.format(amount * unit));
  326. } catch (Exception e) {
  327. return null;
  328. }
  329. }
  330. public static List<String> parseJsonArray(String json, String json_path) {
  331. if (StringUtils.isBlank(json) || StringUtils.isBlank(json_path) || "[]".equalsIgnoreCase(json)) {
  332. return Collections.emptyList();
  333. }
  334. return ((JSONArray) JSONPath.eval(JSON.parse(json), json_path)).stream()
  335. .filter(x -> x != null && StringUtils.isNotBlank(x.toString()))
  336. .map(Object::toString)
  337. .distinct()
  338. .collect(Collectors.toList());
  339. }
  340. public static String nameCleanup(String s) {
  341. if (StringUtils.isBlank(s)) return "";
  342. Pattern pattern = Pattern.compile("[^\\u4e00-\\u9fa50-9a-zA-Z()() ·,]");
  343. return pattern.matcher(s.replaceAll("[、,;]", ","))
  344. .replaceAll("").replaceAll(" +", " ");
  345. }
  346. public static String trim_url_domain(String url) {
  347. Pattern url_pattern = Pattern.compile("^((http://)|(https://))?(www\\.)?([0-9a-zA-Z\\-_\\u4e00-\\u9fa5]+(\\.[a-zA-Z\\u4e00-\\u9fa5]+){1,2}).*$");
  348. if (StringUtils.isEmpty(url)) {
  349. return null;
  350. } else {
  351. val matcher = url_pattern.matcher(url);
  352. if (matcher.matches()) {
  353. return matcher.group(5);
  354. } else {
  355. return null;
  356. }
  357. }
  358. }
  359. public static String upper(String url) {
  360. if (StringUtils.isBlank(url)) return url;
  361. return url.toUpperCase();
  362. }
  363. public static String toStringV2(Object str) {
  364. return str == null ? "" : str.toString();
  365. }
  366. public static String getKey(JSONObject j, String key) {
  367. if (j == null) return "";
  368. String r = j.getString(key);
  369. return r == null ? "" : r;
  370. }
  371. public static List<String> parse_array(String url) {
  372. if (StringUtils.isEmpty(url)) {
  373. return new ArrayList<>();
  374. } else {
  375. return Arrays.stream(url.replaceAll("[;\t\n;,。]", ",")
  376. .split(","))
  377. .filter(StringUtils::isNotBlank)
  378. .distinct()
  379. .collect(Collectors.toList());
  380. }
  381. }
  382. public static ForkJoinPool COMMON_POOL = new ForkJoinPool(Math.max(Runtime.getRuntime().availableProcessors() * 3, 16));
  383. public static void main(String[] args) throws ParseException {
  384. String u1 = "www.baijinggame.cn,baijinggame.com,www.baijinggame.com,yuxianweb.cn,www.yuxianweb.cn";
  385. String u2 = "https://网络.中国,www.yuxianweb.com,baijinggame.cn,yuxianweb.cn,www.yuxianweb.cn";
  386. String u3 = "https://网络.中国,www.yuxianweb.com,baijinggame.cn,www.baijinggame.cn,baijinggame.com,www.baijinggame.com,yuxianweb.cn,www.yuxianweb.cn";
  387. System.out.println(upper("https://网络.中国"));
  388. System.out.println(trim_url_domain("https://网络.中国"));
  389. System.out.println(nameCleanup("小米科技.;有,@限公 司 雷军"));
  390. System.out.println(parseJsonArray("[{\"name\":\"张海林\",\"litigant_id\":\"\"},{\"name\":\"招商银行股份有限公司信用卡中心\",\"litigant_id\":null}]", "$.litigant_id"));
  391. System.out.println(getAmount("100万人民币"));
  392. System.out.println(getGeo("29.2487411737156", "115.821420023591"));
  393. System.out.println(getPhonesEmails("0792-4395333\t;\t18624366098\t;\t17779218631\t;\t"));
  394. System.out.println(toMillisTimestamp("2021-11-12 12:20:30"));
  395. System.out.println(getCompanyName("新疆现代特油科技(股份)有限公司"));
  396. System.out.println(getCompanyOrgTypeStd("小米有", "有限责任公司", "12346"));
  397. System.out.println(companyScoreWeight("存续(在营、开业、在册)", "新疆现代特油科技股份有限公司", "200309577000000", "1"));
  398. }
  399. }