Browse Source

fix: 修复startOffset顺序问题

许家凯 3 years ago
parent
commit
bf43435e60

+ 1 - 1
src/main/java/cn/winhc/elasticsearch/plugin/configuration/AnalyzeContext.java

@@ -47,7 +47,6 @@ public class AnalyzeContext {
     private Text2Tokenizer text2Tokenizer;
 
 
-
     /**
      * 句子分隔符
      */
@@ -98,6 +97,7 @@ public class AnalyzeContext {
         if (termList == null || termList.isEmpty()) {
             return null;
         }
+        termList.sort(Term::compareTo);
 
         for (Term term : termList) {
             term.setOffset(term.getOffset() + offset);

+ 7 - 1
src/main/java/cn/winhc/elasticsearch/plugin/entity/Term.java

@@ -5,7 +5,7 @@ package cn.winhc.elasticsearch.plugin.entity;
  * 2021/11/8 09:03
  */
 
-public class Term {
+public class Term implements Comparable<Term> {
     //词元的起始位移
     private int offset;
     //词元的相对起始位置
@@ -86,4 +86,10 @@ public class Term {
         result = 31 * result + (text != null ? text.hashCode() : 0);
         return result;
     }
+
+    @Override
+    public int compareTo(Term o) {
+        int compare = Integer.compare(this.offset, o.offset);
+        return compare == 0 ? Integer.compare(this.end, o.end) : compare;
+    }
 }

+ 2 - 1
src/main/java/cn/winhc/elasticsearch/plugin/handle/EmailText2Tokenizer.java

@@ -91,7 +91,8 @@ public class EmailText2Tokenizer implements Text2Tokenizer {
 
     public static void main(String[] args) {
         EmailText2Tokenizer emailText2Tokenizer = new EmailText2Tokenizer();
-        List<Term> textTokenizer = emailText2Tokenizer.getTextTokenizer("xjk@abc.topme.pro你好吗xu-jk@qq.com");
+        List<Term> textTokenizer = emailText2Tokenizer.getTextTokenizer("ms.nygs@vip.163.com");
+        textTokenizer.sort(Term::compareTo);
         System.out.println(textTokenizer);
     }
 }

+ 1 - 1
src/main/java/cn/winhc/elasticsearch/plugin/util/DomainUtils.java

@@ -38,7 +38,7 @@ public class DomainUtils {
     }
 
     public static void main(String[] args) {
-        System.out.println(getEmails("13729958080@163.com"));
+        System.out.println(getEmails("ms.nygs@vip.163.com"));
         System.out.println(getDomain("sub.163.com"));
     }
 }