|
@@ -1,7 +1,6 @@
|
|
package com.winhc.bigdata.udf;
|
|
package com.winhc.bigdata.udf;
|
|
|
|
|
|
import com.aliyun.odps.udf.UDF;
|
|
import com.aliyun.odps.udf.UDF;
|
|
-import com.google.common.collect.Lists;
|
|
|
|
import org.apache.commons.lang.StringUtils;
|
|
import org.apache.commons.lang.StringUtils;
|
|
import org.apache.commons.lang.math.NumberUtils;
|
|
import org.apache.commons.lang.math.NumberUtils;
|
|
|
|
|
|
@@ -15,8 +14,8 @@ import java.util.regex.Pattern;
|
|
* @since 2021-08-27 10:45
|
|
* @since 2021-08-27 10:45
|
|
*/
|
|
*/
|
|
public class ToIntervalSalary extends UDF {
|
|
public class ToIntervalSalary extends UDF {
|
|
- private static final List<Integer> MIAN_YI = Lists.newArrayList(null, null);
|
|
|
|
- private static final Pattern p = Pattern.compile("[^0-9-.]");
|
|
|
|
|
|
+ private static final List<Integer> MIAN_YI = Arrays.asList(null, null);
|
|
|
|
+ private static final Pattern p = Pattern.compile("[^0-9-.((]");
|
|
|
|
|
|
public List<Integer> evaluate(String salary) {
|
|
public List<Integer> evaluate(String salary) {
|
|
if (StringUtils.isEmpty(salary) || salary.contains("电话")) {
|
|
if (StringUtils.isEmpty(salary) || salary.contains("电话")) {
|
|
@@ -31,7 +30,7 @@ public class ToIntervalSalary extends UDF {
|
|
if (salary.contains("以下")) {
|
|
if (salary.contains("以下")) {
|
|
returnBig = true;
|
|
returnBig = true;
|
|
}
|
|
}
|
|
- int factor = 1;
|
|
|
|
|
|
+ double factor = 1;
|
|
if (salary.contains("万") || salary.contains("w") || salary.contains("W")) {
|
|
if (salary.contains("万") || salary.contains("w") || salary.contains("W")) {
|
|
factor = 10000;
|
|
factor = 10000;
|
|
}
|
|
}
|
|
@@ -42,29 +41,29 @@ public class ToIntervalSalary extends UDF {
|
|
factor = factor / 12;
|
|
factor = factor / 12;
|
|
}
|
|
}
|
|
salary = p.matcher(salary).replaceAll("");
|
|
salary = p.matcher(salary).replaceAll("");
|
|
- int f = factor;
|
|
|
|
- int[] ints = Arrays.stream(salary.split("[-wWKk万千元]"))
|
|
|
|
|
|
+ double f = factor;
|
|
|
|
+ int[] ints = Arrays.stream(salary.split("[-wWKk万千元((每]"))
|
|
.filter(NumberUtils::isNumber)
|
|
.filter(NumberUtils::isNumber)
|
|
.filter(s -> s.length() < 8)
|
|
.filter(s -> s.length() < 8)
|
|
.mapToDouble(Double::valueOf)
|
|
.mapToDouble(Double::valueOf)
|
|
- .mapToInt(d -> (int) d).map(i -> i * f)
|
|
|
|
|
|
+ .mapToInt(d -> (int) d).map(i -> (int)(i * f))
|
|
.toArray();
|
|
.toArray();
|
|
if (ints.length == 0) {
|
|
if (ints.length == 0) {
|
|
return MIAN_YI;
|
|
return MIAN_YI;
|
|
} else if (ints.length == 1) {
|
|
} else if (ints.length == 1) {
|
|
if (returnSmall) {
|
|
if (returnSmall) {
|
|
- return Lists.newArrayList(ints[0], null);
|
|
|
|
|
|
+ return Arrays.asList(ints[0], null);
|
|
} else if (returnBig) {
|
|
} else if (returnBig) {
|
|
- return Lists.newArrayList(null, ints[0]);
|
|
|
|
|
|
+ return Arrays.asList(null, ints[0]);
|
|
}
|
|
}
|
|
- return Lists.newArrayList(ints[0], ints[0]);
|
|
|
|
|
|
+ return Arrays.asList(ints[0], ints[0]);
|
|
} else if (ints.length == 2) {
|
|
} else if (ints.length == 2) {
|
|
- return Lists.newArrayList(ints[0], ints[1]);
|
|
|
|
|
|
+ return Arrays.asList(ints[0], ints[1]);
|
|
}
|
|
}
|
|
return MIAN_YI;
|
|
return MIAN_YI;
|
|
}
|
|
}
|
|
|
|
|
|
public static void main(String[] args) {
|
|
public static void main(String[] args) {
|
|
- System.out.println(new ToIntervalSalary().evaluate("6-10万\n"));
|
|
|
|
|
|
+ System.out.println(new ToIntervalSalary().evaluate("100000年薪\n"));
|
|
}
|
|
}
|
|
}
|
|
}
|