Преглед на файлове

feat: 添加查找繁体转简体异常问题

许家凯 преди 2 години
родител
ревизия
3eb88739dc
променени са 2 файла, в които са добавени 49 реда и са изтрити 3 реда
  1. 15 3
      pom.xml
  2. 34 0
      src/main/java/com/winhc/bigdata/udf/string/FindTraditional2SimpleError.java

+ 15 - 3
pom.xml

@@ -69,6 +69,18 @@
             <version>5.6.6</version>
         </dependency>
 
+        <dependency>
+            <groupId>com.github.houbb</groupId>
+            <artifactId>opencc4j</artifactId>
+            <version>1.7.2</version>
+        </dependency>
+        <!-- https://mvnrepository.com/artifact/com.github.stuxuhai/jpinyin -->
+        <dependency>
+            <groupId>com.github.stuxuhai</groupId>
+            <artifactId>jpinyin</artifactId>
+            <version>1.1.8</version>
+        </dependency>
+
     </dependencies>
 
     <build>
@@ -83,9 +95,9 @@
                         <descriptor>src/main/resources/package.xml</descriptor>
                     </descriptors>
 
-                   <!-- <descriptorRefs>
-                        <descriptorRef>jar-with-dependencies</descriptorRef>
-                    </descriptorRefs>-->
+                    <!-- <descriptorRefs>
+                         <descriptorRef>jar-with-dependencies</descriptorRef>
+                     </descriptorRefs>-->
                 </configuration>
                 <executions>
                     <execution>

+ 34 - 0
src/main/java/com/winhc/bigdata/udf/string/FindTraditional2SimpleError.java

@@ -0,0 +1,34 @@
+package com.winhc.bigdata.udf.string;
+
+import com.aliyun.odps.udf.UDF;
+import com.aliyun.odps.utils.StringUtils;
+import com.github.houbb.opencc4j.util.ZhConverterUtil;
+import com.github.stuxuhai.jpinyin.ChineseHelper;
+
+/**
+ * @author: XuJiakai
+ * 2022/3/4 10:31
+ */
+public class FindTraditional2SimpleError extends UDF {
+    public Boolean evaluate(String val) {
+        if (StringUtils.isEmpty(val)) {
+            return true;
+        }
+
+        String oldVal = ChineseHelper.convertToSimplifiedChinese(val);
+        String newVal = ZhConverterUtil.toSimple(val);
+        return oldVal.equals(newVal);
+    }
+
+
+    public static void main(String[] args) {
+        FindTraditional2SimpleError simple = new FindTraditional2SimpleError();
+        System.out.println(simple.evaluate("滙"));
+        System.out.println(simple.evaluate("匯"));
+        System.out.println(simple.evaluate("彙"));
+        System.out.println(simple.evaluate("恆"));
+        System.out.println(simple.evaluate("文徵明"));
+        System.out.println(simple.evaluate("徵信"));
+        System.out.println(simple.evaluate("瞭望"));
+    }
+}