EfficaciousStr.java 1.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243
  1. package com.winhc.bigdata.udf.string;
  2. import com.aliyun.odps.udf.UDF;
  3. import com.aliyun.odps.utils.StringUtils;
  4. import java.util.regex.Pattern;
  5. /**
  6. * @author: XuJiakai
  7. * 2021/8/23 15:17
  8. */
  9. public class EfficaciousStr extends UDF {
  10. private static final Pattern pattern = Pattern.compile("^[\\u4e00-\\u9fa50-9a-zA-Z ]+$");
  11. private static final Pattern num_pat = Pattern.compile("^[0-9 ]+$");
  12. private static final Pattern person_pat = Pattern.compile("^[\\u4e00-\\u9fa5a-zA-Z ]+$");
  13. private static final Pattern eng_pat = Pattern.compile("^[a-zA-Z ]+$");
  14. public static final String NAME = "公司名";
  15. public static final String PERSON = "人名";
  16. public static final String APP = "品牌";
  17. public Boolean evaluate(String label, String str) {
  18. if (StringUtils.isEmpty(str)) {
  19. return false;
  20. }
  21. if ((NAME.equals(label) || PERSON.equals(label)) && num_pat.matcher(str).matches()) {
  22. return false;
  23. }
  24. switch (label) {
  25. case NAME:
  26. return person_pat.matcher(str).matches();
  27. case PERSON:
  28. if(eng_pat.matcher(str).matches()){
  29. return false;
  30. }
  31. return person_pat.matcher(str).matches();
  32. case APP:
  33. return pattern.matcher(str).matches();
  34. default:
  35. throw new RuntimeException("label is not define," + label);
  36. }
  37. }
  38. }