许家凯 3 years ago
parent
commit
7672d8eba4

+ 13 - 0
pom.xml

@@ -204,6 +204,19 @@
             <version>5.5.8</version>
         </dependency>
 
+        <dependency>
+            <groupId>com.hankcs</groupId>
+            <artifactId>hanlp</artifactId>
+            <version>portable-1.8.2</version>
+        </dependency>
+
+        <!-- https://mvnrepository.com/artifact/com.github.stuxuhai/jpinyin -->
+        <dependency>
+            <groupId>com.github.stuxuhai</groupId>
+            <artifactId>jpinyin</artifactId>
+            <version>1.1.8</version>
+        </dependency>
+
 <!--        <dependency>-->
 <!--            <groupId>com.alibaba.otter</groupId>-->
 <!--            <artifactId>canal.protocol</artifactId>-->

+ 10 - 0
src/main/java/com/winhc/phoenix/example/controller/SearchController.java

@@ -47,6 +47,16 @@ public class SearchController {
         return map.get(version.getValue()).query(content, from, size, new HashSet<>(), sortType);
     }
 
+    @Timer
+    @ApiOperation(value = "搜索提示")
+    @GetMapping("tips")
+    public Object tips(String content
+            , @RequestParam(defaultValue = "v9") EsVersion version
+    ) {
+        return map.get(version.getValue()).tips(content);
+    }
+
+
 
     @Timer
     @ApiOperation(value = "找关系搜索")

+ 14 - 3
src/main/java/com/winhc/phoenix/example/service/impl/SearchV9ServiceImpl.java

@@ -7,10 +7,12 @@ import com.winhc.phoenix.example.enums.CompanySearchSortType;
 import com.winhc.phoenix.example.service.SearchService;
 import com.winhc.phoenix.example.util.company.search.CompanyQueryVo;
 import com.winhc.phoenix.example.util.company.search.CompanySearchQueryUtils;
+import com.winhc.phoenix.example.util.company.search.CompanySearchTipsUtils;
 import lombok.AllArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
 import org.elasticsearch.index.query.QueryBuilder;
-import org.elasticsearch.search.rescore.RescoreBuilder;
+import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
+import org.elasticsearch.search.rescore.QueryRescorerBuilder;
 import org.elasticsearch.search.sort.FieldSortBuilder;
 import org.elasticsearch.search.sort.SortBuilders;
 import org.elasticsearch.search.sort.SortOrder;
@@ -33,11 +35,20 @@ public class SearchV9ServiceImpl implements SearchService {
 
     private SearchDao searchDao;
     public static final String index = "winhc-company-v9_1";
+//    public static final String index = "winhc_index_rt_company";
     public static final String type = "company";
 
+    private static final String[] includesTips = new String[]{"cname", "legal_entity*", "estiblish_time", "reg_status_std", "company_type","logo", "new_cid"};
+    private static final FetchSourceContext fetchSourceContextTips = new FetchSourceContext(true, includesTips, null);
+
+
     @Override
     public Object tips(String s) {
-        return null;
+        QueryBuilder tips = CompanySearchTipsUtils.tips(s);
+        String preference = SecureUtil.md5(s);
+        List<QueryRescorerBuilder> reScoreBuilder = CompanySearchTipsUtils.getReScoreBuilder(s);
+        Object search = searchDao.search(index, type, tips, reScoreBuilder, null, fetchSourceContextTips, 0, 10, preference);
+        return search;
     }
 
     @Override
@@ -74,7 +85,7 @@ public class SearchV9ServiceImpl implements SearchService {
             Object search = searchDao.search(index, type, queryBuilder, null, order, null, from, size, preference);
             return search;
         } else {
-            List<RescoreBuilder> reScoreBuilder = CompanySearchQueryUtils.getReScoreBuilder(companyQueryVo);
+            List<QueryRescorerBuilder> reScoreBuilder = CompanySearchQueryUtils.getReScoreBuilder(companyQueryVo);
             Object search = searchDao.search(index, type, queryBuilder, reScoreBuilder, null, null, from, size, preference);
             return search;
         }

+ 22 - 0
src/main/java/com/winhc/phoenix/example/util/company/search/CompanyIndexUtils.java

@@ -0,0 +1,22 @@
+package com.winhc.phoenix.example.util.company.search;
+
+import com.github.stuxuhai.jpinyin.ChineseHelper;
+import org.apache.commons.lang3.StringUtils;
+
+import java.util.regex.Pattern;
+
+/**
+ * @author: XuJiakai
+ * 2021/12/31 11:15
+ */
+public class CompanyIndexUtils {
+    private static final Pattern pattern = Pattern.compile("[^\\u4e00-\\u9fa50-9a-zA-Z]");
+
+    public static String convertToSimplifiedChinese(String val) {
+        return StringUtils.isEmpty(val) ? null : ChineseHelper.convertToSimplifiedChinese(val);
+    }
+
+    public static String cleanup(String val) {
+        return StringUtils.isNotBlank(val) ? pattern.matcher(val).replaceAll("") : "";
+    }
+}

+ 76 - 28
src/main/java/com/winhc/phoenix/example/util/company/search/CompanySearchQueryUtils.java

@@ -1,12 +1,14 @@
 package com.winhc.phoenix.example.util.company.search;
 
+import org.apache.commons.lang3.StringUtils;
+import org.elasticsearch.common.lucene.search.function.CombineFunction;
 import org.elasticsearch.index.query.*;
+import org.elasticsearch.index.query.functionscore.FunctionScoreQueryBuilder;
 import org.elasticsearch.index.query.functionscore.ScriptScoreFunctionBuilder;
 import org.elasticsearch.script.Script;
 import org.elasticsearch.script.ScriptType;
 import org.elasticsearch.search.rescore.QueryRescoreMode;
 import org.elasticsearch.search.rescore.QueryRescorerBuilder;
-import org.elasticsearch.search.rescore.RescoreBuilder;
 
 import java.util.*;
 
@@ -25,7 +27,9 @@ public class CompanySearchQueryUtils {
         //以下为过滤逻辑
         BoolQueryBuilder returnBoolQuery = boolQuery()
                 .filter(termQuery("deleted", "0"))
-                .filter(rangeQuery("company_score_weight").gt(0.3F))
+                .filter(boolQuery().should(rangeQuery("company_score_weight").gt(0.3F))
+                        .should(termsQuery("company_type", "2"))
+                )
                 .must(boolQuery);
 
         Optional<List<Integer>> searchTypeList = Optional.ofNullable(companyQueryVo.getSearchTypeList());
@@ -56,14 +60,31 @@ public class CompanySearchQueryUtils {
         return returnBoolQuery;
     }
 
-    public static List<RescoreBuilder> getReScoreBuilder(CompanyQueryVo companyQueryVo) {
+    public static QueryBuilder addScoreFunction(QueryBuilder query, CompanyQueryVo companyQueryVo) {
         String content = companyQueryVo.getCleanupContent();
+        Map<String, Object> map = new HashMap<String, Object>(2) {{
+            put("query_content", content);
+            put("der", 0.85);
+        }};
+
+        FunctionScoreQueryBuilder.FilterFunctionBuilder[] f = new FunctionScoreQueryBuilder.FilterFunctionBuilder[]{
+                new FunctionScoreQueryBuilder.FilterFunctionBuilder(new ScriptScoreFunctionBuilder(new Script(ScriptType.STORED, null, "company-name-term-score_v2", map)))
+                , new FunctionScoreQueryBuilder.FilterFunctionBuilder(new ScriptScoreFunctionBuilder(new Script(ScriptType.STORED, null, "company-search-script_v2", map)))
+//                , new FunctionScoreQueryBuilder.FilterFunctionBuilder(new ScriptScoreFunctionBuilder(new Script(ScriptType.STORED, null, "company-search-script_v2", map)))
+        };
+
+        return new FunctionScoreQueryBuilder(query, f).boostMode(CombineFunction.MULTIPLY);
+    }
+
+    public static List<QueryRescorerBuilder> getReScoreBuilder(CompanyQueryVo companyQueryVo) {
+        String content = CompanyIndexUtils.convertToSimplifiedChinese(companyQueryVo.getCleanupContent());
+
 
         Map<String, Object> map = new HashMap<String, Object>(2) {{
             put("query_content", content);
             put("der", 0.85);
         }};
-        List<RescoreBuilder> list = new ArrayList<>();
+        List<QueryRescorerBuilder> list = new ArrayList<>();
 
         //加上名称全匹配分数
         list.add(new QueryRescorerBuilder(functionScoreQuery(new ScriptScoreFunctionBuilder(new Script(ScriptType.STORED, null, "company-name-term-score_v2", map))))
@@ -96,12 +117,16 @@ public class CompanySearchQueryUtils {
             );
 
 
+            boolQuery.should(termQuery("cname.value", content).boost(0));
             boolQuery.should(termQuery("org_number", org_content.toUpperCase()).boost(1000));
             boolQuery.should(termQuery("credit_code", org_content.toUpperCase()).boost(1000));
             boolQuery.should(termQuery("reg_number", org_content.toUpperCase()).boost(1000));
             boolQuery.should(termQuery("icp_domain.keyword", org_content).boost(1000));
             boolQuery.should(termQuery("emails.keyword", org_content).boost(1000));
-            boolQuery.should(termQuery("phones", org_content).boost(1000));
+            boolQuery.should(disMaxQuery()
+                    .add(termQuery("phones.keyword", org_content).boost(1000))
+                    .add(matchQuery("phones", org_content).boost(1000))
+            );
             boolQuery.should(termQuery("reg_location.keyword", org_content).boost(1000));
         }
 
@@ -147,30 +172,52 @@ public class CompanySearchQueryUtils {
                 .tieBreaker(0.4F)
         );
 
+        DisMaxQueryBuilder add = disMaxQuery().add(disMaxQuery()
+                .add(disMaxQuery()
+                        .add(matchPhraseQuery("cname.show.pinyin", content))
+                        .add(matchPhraseQuery("history_name.show.pinyin", content))
+                )
+                .add(multiMatchQuery(content)
+                        .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
+                        .minimumShouldMatch("5<90%")
+                        .tieBreaker(0.3F)
+
+                        .field("cname.show", 16)
+                        .field("history_name.show", 12))
+                .add(multiMatchQuery(content)
+                        .operator(Operator.AND)
+                        .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
+                        .tieBreaker(0.3F)
+                        .field("cname.show.standard", 16)
+                        .field("history_name.show.standard", 12))
+
+                .tieBreaker(0.4F));
+
+        String simplifiedChinese = CompanyIndexUtils.convertToSimplifiedChinese(org_content);
+        if (StringUtils.isNotBlank(simplifiedChinese)) {
+            //添加繁体字简化查询
+            add.add(disMaxQuery()
+                    .add(disMaxQuery()
+                            .add(matchPhraseQuery("cname.simplified_chinese.pinyin", simplifiedChinese))
+                            .add(matchPhraseQuery("history_name.show.pinyin", simplifiedChinese))
+                    )
+                    .add(multiMatchQuery(simplifiedChinese)
+                            .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
+                            .minimumShouldMatch("5<90%")
+                            .tieBreaker(0.3F)
+
+                            .field("cname.simplified_chinese", 16)
+                            .field("history_name.simplified_chinese", 12))
+                    .add(multiMatchQuery(simplifiedChinese)
+                            .operator(Operator.AND)
+                            .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
+                            .tieBreaker(0.3F)
+                            .field("cname.simplified_chinese.standard", 16)
+                            .field("history_name.simplified_chinese.standard", 12))
+                    .tieBreaker(0.4F));
+        }
 
-        boolQuery.should(
-                disMaxQuery()
-                        .add(disMaxQuery()
-                                .add(matchPhraseQuery("cname.show.pinyin", content))
-                                .add(matchPhraseQuery("history_name.show.pinyin", content))
-                        )
-                        .add(multiMatchQuery(content)
-                                .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
-                                .minimumShouldMatch("5<90%")
-                                .tieBreaker(0.3F)
-
-                                .field("cname.show", 16)
-                                .field("history_name.show", 12))
-                        .add(multiMatchQuery(content)
-                                .operator(Operator.AND)
-                                .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
-                                .tieBreaker(0.3F)
-                                .field("cname.show.standard", 16)
-                                .field("history_name.show.standard", 12))
-
-                        .tieBreaker(0.4F)
-        );
-
+        boolQuery.should(add);
         return boolQuery;
     }
 
@@ -196,4 +243,5 @@ public class CompanySearchQueryUtils {
         return spanNearQueryBuilder;
     }
 
+
 }

+ 170 - 0
src/main/java/com/winhc/phoenix/example/util/company/search/CompanySearchTipsUtils.java

@@ -0,0 +1,170 @@
+package com.winhc.phoenix.example.util.company.search;
+
+import org.apache.commons.lang3.StringUtils;
+import org.elasticsearch.index.query.*;
+import org.elasticsearch.index.query.functionscore.ScriptScoreFunctionBuilder;
+import org.elasticsearch.script.Script;
+import org.elasticsearch.script.ScriptType;
+import org.elasticsearch.search.rescore.QueryRescoreMode;
+import org.elasticsearch.search.rescore.QueryRescorerBuilder;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import static org.elasticsearch.index.query.QueryBuilders.*;
+
+/**
+ * @author: XuJiakai
+ * 2021/12/31 11:01
+ */
+public class CompanySearchTipsUtils {
+    public static QueryBuilder tips(String content) {
+
+        BoolQueryBuilder boolQuery = getBoolQuery(content);
+
+        //以下为过滤逻辑
+        BoolQueryBuilder returnBoolQuery = boolQuery()
+                .filter(termQuery("deleted", "0"))
+                .filter(boolQuery().should(rangeQuery("company_score_weight").gt(0.3F))
+                        .should(termsQuery("company_type", "2"))
+                )
+                .must(boolQuery);
+        return returnBoolQuery;
+    }
+
+    private static BoolQueryBuilder getBoolQuery(String c) {
+        BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
+
+        String org_content = c;
+        String content = CompanyIndexUtils.cleanup(c);
+
+        if (content.length() > 3) {
+            boolQuery.should(disMaxQuery()
+                    .add(termQuery("history_name.show.keyword", org_content))
+                    .add(termQuery("history_name.value", content))
+            );
+
+            boolQuery.should(termQuery("cname.value", content).boost(0));
+            boolQuery.should(termQuery("org_number", org_content.toUpperCase()).boost(1000));
+            boolQuery.should(termQuery("credit_code", org_content.toUpperCase()).boost(1000));
+            boolQuery.should(termQuery("reg_number", org_content.toUpperCase()).boost(1000));
+            boolQuery.should(termQuery("emails.keyword", org_content).boost(1000));
+            boolQuery.should(disMaxQuery()
+                    .add(termQuery("phones.keyword", org_content).boost(1000))
+                    .add(matchQuery("phones", org_content).boost(1000))
+            );
+        }
+
+
+        boolQuery.should(disMaxQuery()
+                .add(disMaxQuery()
+                        .add(termQuery("legal_entity_name.keyword", org_content).boost(10))
+                        .add(termQuery("holder.name.keyword", content).boost(10F))
+                        .add(termQuery("holder_history.name.keyword", content).boost(10F))
+                        .add(termQuery("staff.name.keyword", content).boost(5.5F))
+                        .add(termQuery("staff_history.name.keyword", content).boost(5.5F))
+                        .tieBreaker(0.3F))
+//                .add(disMaxQuery()
+//                        .add(matchQuery("legal_entity_name", org_content).boost(6).minimumShouldMatch("5<95%"))
+//                        .add(matchPhraseQuery("holder.name", content).boost(10).slop(3))
+//                        .add(matchPhraseQuery("holder_history.name", content).boost(10).slop(3))
+//                        .add(matchPhraseQuery("staff.name", content).boost(6).slop(3))
+//                        .add(matchPhraseQuery("staff_history.name", content).boost(6).slop(3))
+//                        .tieBreaker(0.3F))
+                .tieBreaker(0.3F)
+        );
+        boolQuery.should(disMaxQuery()
+                .add(disMaxQuery()
+                        .add(termQuery("icp.keyword", org_content).boost(20))
+                        .add(termQuery("app_info.keyword", org_content).boost(40))
+                        .add(termQuery("company_tm.keyword", org_content).boost(20))
+                        .tieBreaker(0.4F))
+                .add(disMaxQuery()
+                        .add(matchPhraseQuery("icp", content).boost(8).slop(3))
+                        .add(matchPhraseQuery("app_info", content).boost(19).slop(3))
+                        .add(matchPhraseQuery("company_tm", org_content).boost(7).slop(1))
+                        .tieBreaker(0.3F))
+                .tieBreaker(0.4F)
+        );
+
+
+        boolQuery.should(disMaxQuery()
+                .add(disMaxQuery()
+                        .add(matchQuery("emails", org_content).boost(7).minimumShouldMatch("100%"))
+                        .add(matchQuery("icp_domain", org_content).boost(1000).minimumShouldMatch("100%"))
+
+                        .tieBreaker(0.3F))
+                .tieBreaker(0.4F)
+        );
+
+        DisMaxQueryBuilder add = disMaxQuery().add(disMaxQuery()
+//                .add(disMaxQuery()
+//                        .add(matchPhraseQuery("cname.show.pinyin", content))
+//                        .add(matchPhraseQuery("history_name.show.pinyin", content))
+//                )
+                .add(multiMatchQuery(content)
+                        .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
+                        .minimumShouldMatch("5<90%")
+                        .tieBreaker(0.3F)
+
+                        .field("cname.show", 16)
+                        .field("history_name.show", 12))
+//                .add(multiMatchQuery(content)
+//                        .operator(Operator.AND)
+//                        .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
+//                        .tieBreaker(0.3F)
+//                        .field("cname.show.standard", 16)
+//                        .field("history_name.show.standard", 12))
+                .tieBreaker(0.4F));
+
+        String simplifiedChinese = CompanyIndexUtils.convertToSimplifiedChinese(org_content);
+        if (StringUtils.isNotBlank(simplifiedChinese)) {
+            //添加繁体字简化查询
+            add.add(disMaxQuery()
+//                    .add(disMaxQuery()
+//                            .add(matchPhraseQuery("cname.simplified_chinese.pinyin", simplifiedChinese))
+//                            .add(matchPhraseQuery("history_name.show.pinyin", simplifiedChinese))
+//                    )
+                    .add(multiMatchQuery(simplifiedChinese)
+                            .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
+                            .minimumShouldMatch("5<90%")
+                            .tieBreaker(0.3F)
+
+                            .field("cname.simplified_chinese", 16)
+                            .field("history_name.simplified_chinese", 12))
+//                    .add(multiMatchQuery(simplifiedChinese)
+//                            .operator(Operator.AND)
+//                            .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
+//                            .tieBreaker(0.3F)
+//                            .field("cname.simplified_chinese.standard", 16)
+//                            .field("history_name.simplified_chinese.standard", 12))
+                    .tieBreaker(0.4F));
+        }
+
+        boolQuery.should(add);
+        return boolQuery;
+    }
+
+    public static List<QueryRescorerBuilder> getReScoreBuilder(String content) {
+        String c = CompanyIndexUtils.convertToSimplifiedChinese(CompanyIndexUtils.cleanup(content));
+        Map<String, Object> map = new HashMap<String, Object>(2) {{
+            put("query_content", c);
+            put("der", 0.85);
+        }};
+        List<QueryRescorerBuilder> list = new ArrayList<>();
+
+        //加上名称全匹配分数
+        list.add(new QueryRescorerBuilder(functionScoreQuery(new ScriptScoreFunctionBuilder(new Script(ScriptType.STORED, null, "company-name-term-score_v2", map))))
+                .windowSize(50)
+                .setScoreMode(QueryRescoreMode.Total));
+
+        //乘上权重分
+        list.add(new QueryRescorerBuilder(functionScoreQuery(new ScriptScoreFunctionBuilder(new Script(ScriptType.STORED, null, "company-search-script_v2", map))))
+                .windowSize(50)
+                .setScoreMode(QueryRescoreMode.Multiply));
+        return list;
+    }
+
+}

+ 4 - 1
src/main/resources/static/search-company.html

@@ -184,7 +184,10 @@
         for (let mKey in m) {
             if (value[mKey]) {
                 let s = value[mKey].map(function (v, i, a) {
-                    v = v.replaceAll("</font>)<font class='my-bold'>", ")").replaceAll("</font>(<font class='my-bold'>", "(")
+                    v = v.replaceAll(")", ")").replaceAll("(", "(");
+                    if (v.indexOf("</font>)<font class='my-bold'>") > 0 && v.indexOf("</font>(<font class='my-bold'>") > 0) {
+                        v = v.replaceAll("</font>)<font class='my-bold'>", ")").replaceAll("</font>(<font class='my-bold'>", "(")
+                    }
                     return '<span class="my-badge">' + v + '</span>'
                 }).join(',')
                 htm += '<p>' + m[mKey] + ':' + s + '</p>'