xufei преди 2 години
родител
ревизия
f903f5eb12
променени са 1 файла, в които са добавени 12 реда и са изтрити 4 реда
  1. 12 4
      src/main/java/com/winhc/bigdata/udf/etl/RegisteredCapitalFormat.java

+ 12 - 4
src/main/java/com/winhc/bigdata/udf/etl/RegisteredCapitalFormat.java

@@ -13,7 +13,8 @@ import java.util.regex.Pattern;
  * Ps. 800.000000万元 -> 800万元
  */
 public class RegisteredCapitalFormat extends UDF {
-    private static final Pattern pattern = Pattern.compile("[^\\x00-\\x7F]+");
+    //private static final Pattern pattern = Pattern.compile("([^\\x00-\\x7F]+|[\\s]+)");
+    private static final Pattern pattern = Pattern.compile("[^0-9\\\\.]+");
     DecimalFormat decimalFormat = new DecimalFormat("0.###########");
 
     public String evaluate(String content) {
@@ -21,8 +22,10 @@ public class RegisteredCapitalFormat extends UDF {
             return null;
         }
         try {
-            String prefix = pattern.matcher(content).replaceAll("");
-            String unit = content.replace(prefix, "");
+            String prefix = pattern.matcher(content).replaceAll("")
+                    .replaceAll("\\s+", "");
+            String unit = content.replace(prefix, "")
+                    .replaceAll("\\s+", "");
             double v = Double.parseDouble(prefix);
             return decimalFormat.format(v) + unit;
         } catch (Exception e) {
@@ -32,7 +35,12 @@ public class RegisteredCapitalFormat extends UDF {
 
     public static void main(String[] args) {
         RegisteredCapitalFormat registeredCapitalFormat = new RegisteredCapitalFormat();
-        String evaluate = registeredCapitalFormat.evaluate("0.00000美元");
+        String s = "0.0000 万元 人民币";
+        //String s = "10 万元";
+        //String s = "10 万元 人民币";
+        //String s = "10.00100 万元 人民币";
+        //String s = "5000.000000万";
+        String evaluate = registeredCapitalFormat.evaluate(s);
         System.out.println(evaluate);
     }
 }