|
@@ -3,6 +3,7 @@ package com.winhc.bigdata.utils;
|
|
import com.alibaba.fastjson.JSONObject;
|
|
import com.alibaba.fastjson.JSONObject;
|
|
import com.aliyun.odps.utils.StringUtils;
|
|
import com.aliyun.odps.utils.StringUtils;
|
|
import com.github.houbb.opencc4j.util.ZhConverterUtil;
|
|
import com.github.houbb.opencc4j.util.ZhConverterUtil;
|
|
|
|
+import com.winhc.bigdata.udf.etl.CompanyPhoneOrEmailMerge;
|
|
|
|
|
|
import java.util.Arrays;
|
|
import java.util.Arrays;
|
|
import java.util.Collections;
|
|
import java.util.Collections;
|
|
@@ -24,6 +25,17 @@ public class CompanyUtils {
|
|
return Arrays.stream(val.split("\t;\t")).map(String::trim).filter(StringUtils::isNotBlank).map(CompanyUtils::valTrim).collect(Collectors.toList());
|
|
return Arrays.stream(val.split("\t;\t")).map(String::trim).filter(StringUtils::isNotBlank).map(CompanyUtils::valTrim).collect(Collectors.toList());
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+ public static String cleanup2(String s) {
|
|
|
|
+ Pattern pattern = Pattern.compile("[\\u4e00-\\u9fa5]");
|
|
|
|
+ if (StringUtils.isBlank(s)) return "";
|
|
|
|
+ return pattern.matcher(s).replaceAll("");
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ public static List<String> spiltNamesPlus(String val) {
|
|
|
|
+ return spiltNames(val).stream().map(CompanyUtils::cleanup2)
|
|
|
|
+ .filter(StringUtils::isNotBlank).collect(Collectors.toList());
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
|
|
private static String valTrim(String val) {
|
|
private static String valTrim(String val) {
|
|
if (StringUtils.isBlank(val)) {
|
|
if (StringUtils.isBlank(val)) {
|