|
@@ -1,9 +1,11 @@
|
|
|
package com.winhc.phoenix.example.service.impl;
|
|
|
|
|
|
+import cn.hutool.crypto.SecureUtil;
|
|
|
import com.winhc.phoenix.example.dao.SearchDao;
|
|
|
import com.winhc.phoenix.example.enums.CompanyQueryType;
|
|
|
import com.winhc.phoenix.example.enums.CompanySearchSortType;
|
|
|
import com.winhc.phoenix.example.service.SearchService;
|
|
|
+import com.winhc.phoenix.example.util.CompanyNameUtils;
|
|
|
import lombok.AllArgsConstructor;
|
|
|
import lombok.extern.slf4j.Slf4j;
|
|
|
import org.elasticsearch.index.query.*;
|
|
@@ -17,7 +19,6 @@ import org.elasticsearch.search.rescore.RescoreBuilder;
|
|
|
import org.elasticsearch.search.sort.FieldSortBuilder;
|
|
|
import org.elasticsearch.search.sort.SortBuilders;
|
|
|
import org.elasticsearch.search.sort.SortOrder;
|
|
|
-import org.springframework.context.annotation.Primary;
|
|
|
import org.springframework.stereotype.Service;
|
|
|
|
|
|
import java.util.*;
|
|
@@ -30,7 +31,7 @@ import static org.elasticsearch.index.query.QueryBuilders.*;
|
|
|
* 2020/11/19 14:54
|
|
|
*/
|
|
|
@Slf4j
|
|
|
-@Primary
|
|
|
+//@Primary
|
|
|
@Service(value = "v8_fast")
|
|
|
@AllArgsConstructor
|
|
|
public class SearchV8FastServiceImpl implements SearchService {
|
|
@@ -38,7 +39,7 @@ public class SearchV8FastServiceImpl implements SearchService {
|
|
|
|
|
|
// public static final String index = "winhc-company-v8";
|
|
|
// public static final String index = "winhc-company-v8_3";
|
|
|
- public static final String index = "winhc-company-v8_4";
|
|
|
+ public static final String index = "winhc-company-v8_5";
|
|
|
public static final String type = "company";
|
|
|
private static final String[] includes = new String[]{"cname", "legal_entity*", "estiblish_time", "reg_status_std", "company_type", "province_code", "reg_capital", "logo", "new_cid"};
|
|
|
private static final FetchSourceContext fetchSourceContext = new FetchSourceContext(true, includes, null);
|
|
@@ -73,15 +74,17 @@ public class SearchV8FastServiceImpl implements SearchService {
|
|
|
put("der", 0.85);
|
|
|
}};
|
|
|
|
|
|
+ //加上名称全匹配分数
|
|
|
QueryRescorerBuilder rescorerBuilder_0 = new QueryRescorerBuilder(functionScoreQuery(new ScriptScoreFunctionBuilder(new Script(ScriptType.STORED, null, "company-name-term-score", map))))
|
|
|
.windowSize(100)
|
|
|
.setScoreMode(QueryRescoreMode.Total);
|
|
|
|
|
|
+ //乘上权重分
|
|
|
QueryRescorerBuilder rescorerBuilder = new QueryRescorerBuilder(functionScoreQuery(new ScriptScoreFunctionBuilder(new Script(ScriptType.STORED, null, "company-search-script_v2", map))))
|
|
|
.windowSize(100)
|
|
|
.setScoreMode(QueryRescoreMode.Multiply);
|
|
|
|
|
|
-
|
|
|
+ //加上知产的少量加分
|
|
|
QueryRescorerBuilder rescorerBuilder2 = new QueryRescorerBuilder(getRescorerBool(content))
|
|
|
.windowSize(50)
|
|
|
.setScoreMode(QueryRescoreMode.Total);
|
|
@@ -94,26 +97,27 @@ public class SearchV8FastServiceImpl implements SearchService {
|
|
|
public Object query(String content, int from, int size, Set<CompanyQueryType> set, CompanySearchSortType sortType) {
|
|
|
//召回
|
|
|
BoolQueryBuilder boolQuery = getBoolQuery(content, set);
|
|
|
+ String preference = SecureUtil.md5(content);
|
|
|
|
|
|
if (sortType == CompanySearchSortType.注册资本_从低到高) {
|
|
|
FieldSortBuilder order = SortBuilders.fieldSort("reg_capital_amount").order(SortOrder.ASC);
|
|
|
- Object search = searchDao.search(index, type, boolQuery, null, order, null, from, size);
|
|
|
+ Object search = searchDao.search(index, type, boolQuery, null, order, null, from, size, preference);
|
|
|
return search;
|
|
|
} else if (sortType == CompanySearchSortType.注册资本_从高到底) {
|
|
|
FieldSortBuilder order = SortBuilders.fieldSort("reg_capital_amount").order(SortOrder.DESC);
|
|
|
- Object search = searchDao.search(index, type, boolQuery, null, order, null, from, size);
|
|
|
+ Object search = searchDao.search(index, type, boolQuery, null, order, null, from, size, preference);
|
|
|
return search;
|
|
|
} else if (sortType == CompanySearchSortType.成立日期_从早到晚) {
|
|
|
FieldSortBuilder order = SortBuilders.fieldSort("estiblish_time").order(SortOrder.ASC);
|
|
|
- Object search = searchDao.search(index, type, boolQuery, null, order, null, from, size);
|
|
|
+ Object search = searchDao.search(index, type, boolQuery, null, order, null, from, size, preference);
|
|
|
return search;
|
|
|
} else if (sortType == CompanySearchSortType.成立日期_从晚到早) {
|
|
|
FieldSortBuilder order = SortBuilders.fieldSort("estiblish_time").order(SortOrder.DESC);
|
|
|
- Object search = searchDao.search(index, type, boolQuery, null, order, null, from, size);
|
|
|
+ Object search = searchDao.search(index, type, boolQuery, null, order, null, from, size, preference);
|
|
|
return search;
|
|
|
} else {
|
|
|
- List<RescoreBuilder> reScoreBuilder = getReScoreBuilder(content);
|
|
|
- Object search = searchDao.search(index, type, boolQuery, reScoreBuilder, null, null, from, size);
|
|
|
+ List<RescoreBuilder> reScoreBuilder = getReScoreBuilder(CompanyNameUtils.cleanup(content));
|
|
|
+ Object search = searchDao.search(index, type, boolQuery, reScoreBuilder, null, null, from, size, preference);
|
|
|
return search;
|
|
|
}
|
|
|
}
|
|
@@ -139,48 +143,61 @@ public class SearchV8FastServiceImpl implements SearchService {
|
|
|
private BoolQueryBuilder getBoolQuery(String content, Set<CompanyQueryType> set) {
|
|
|
BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
|
|
|
|
|
|
-// boolQuery.should(termQuery("cname.value.keyword", content).boost(1000));
|
|
|
+ String org_content = content;
|
|
|
+ content = CompanyNameUtils.cleanup(content);
|
|
|
+
|
|
|
if (content.length() > 3) {
|
|
|
boolQuery.should(termQuery("history_name.value.keyword", content).boost(1000));
|
|
|
+ boolQuery.should(termQuery("org_number", org_content.toUpperCase()).boost(1000));
|
|
|
+ boolQuery.should(termQuery("credit_code", org_content.toUpperCase()).boost(1000));
|
|
|
+ boolQuery.should(termQuery("reg_number", org_content.toUpperCase()).boost(1000));
|
|
|
+ boolQuery.should(termQuery("icp_domain.keyword", org_content).boost(1000));
|
|
|
+ boolQuery.should(termQuery("emails.keyword", org_content).boost(1000));
|
|
|
+ boolQuery.should(termQuery("phones", org_content).boost(1000));
|
|
|
+ boolQuery.should(termQuery("reg_location.keyword", org_content).boost(1000));
|
|
|
}
|
|
|
|
|
|
boolQuery.should(disMaxQuery()
|
|
|
- .add(disMaxQuery()
|
|
|
- .add(termQuery("legal_entity_name.keyword", content).boost(10))
|
|
|
- .add(termQuery("holder.name.keyword", content).boost(10F))
|
|
|
- .add(termQuery("staff.name.keyword", content).boost(5.5F))
|
|
|
- .tieBreaker(0.3F)
|
|
|
- ).add(disMaxQuery()
|
|
|
-
|
|
|
- .add(matchQuery("legal_entity_name", content).boost(6).minimumShouldMatch("5<95%"))
|
|
|
-// .add(matchQuery("holder", content).boost(10).minimumShouldMatch("5<80%"))
|
|
|
-// .add(matchQuery("staff", content).boost(6).minimumShouldMatch("5<80%"))
|
|
|
+ .add(disMaxQuery()
|
|
|
+ .add(termQuery("legal_entity_name.keyword", org_content).boost(10))
|
|
|
+ .add(termQuery("holder.name.keyword", content).boost(10F))
|
|
|
+ .add(termQuery("holder_history.name.keyword", content).boost(10F))
|
|
|
+ .add(termQuery("staff.name.keyword", content).boost(5.5F))
|
|
|
+ .add(termQuery("staff_history.name.keyword", content).boost(5.5F))
|
|
|
+ .tieBreaker(0.3F)
|
|
|
+ ).add(disMaxQuery()
|
|
|
|
|
|
-// .add(matchPhraseQuery("legal_entity_name", content).boost(6).slop(3))
|
|
|
- .add(matchPhraseQuery("holder.name", content).boost(10).slop(3))
|
|
|
- .add(matchPhraseQuery("staff.name", content).boost(6).slop(3))
|
|
|
+ .add(matchQuery("legal_entity_name", org_content).boost(6).minimumShouldMatch("5<95%"))
|
|
|
+ .add(matchPhraseQuery("holder.name", content).boost(10).slop(3))
|
|
|
+ .add(matchPhraseQuery("holder_history.name", content).boost(10).slop(3))
|
|
|
+ .add(matchPhraseQuery("staff.name", content).boost(6).slop(3))
|
|
|
+ .add(matchPhraseQuery("staff_history.name", content).boost(6).slop(3))
|
|
|
|
|
|
- .tieBreaker(0.3F)
|
|
|
- ).tieBreaker(0.3F)
|
|
|
+ .tieBreaker(0.3F)
|
|
|
+ ).tieBreaker(0.3F)
|
|
|
);
|
|
|
boolQuery.should(disMaxQuery()
|
|
|
- .add(disMaxQuery()
|
|
|
- .add(termQuery("icp.keyword", content).boost(20))
|
|
|
- .add(termQuery("app_info.keyword", content).boost(40))
|
|
|
- .add(termQuery("company_tm.keyword", content).boost(20))
|
|
|
- .tieBreaker(0.4F))
|
|
|
- .add(disMaxQuery()
|
|
|
-// .add(matchQuery("icp", content).boost(8).minimumShouldMatch("5"))
|
|
|
-// .add(matchQuery("app_info", content).boost(19).minimumShouldMatch("5"))
|
|
|
-// .add(matchQuery("company_tm", content).boost(7).minimumShouldMatch("5"))
|
|
|
-
|
|
|
-
|
|
|
- .add(matchPhraseQuery("icp", content).boost(8).slop(3))
|
|
|
- .add(matchPhraseQuery("app_info", content).boost(19).slop(3))
|
|
|
- .add(matchPhraseQuery("company_tm", content).boost(7).slop(3))
|
|
|
-
|
|
|
- .tieBreaker(0.3F)
|
|
|
- ).tieBreaker(0.4F)
|
|
|
+ .add(disMaxQuery()
|
|
|
+ .add(termQuery("icp.keyword", org_content).boost(20))
|
|
|
+ .add(termQuery("app_info.keyword", org_content).boost(40))
|
|
|
+ .add(termQuery("company_tm.keyword", org_content).boost(20))
|
|
|
+ .tieBreaker(0.4F))
|
|
|
+ .add(disMaxQuery()
|
|
|
+ .add(matchPhraseQuery("icp", org_content).boost(8).slop(3))
|
|
|
+ .add(matchPhraseQuery("app_info", org_content).boost(19).slop(3))
|
|
|
+ .add(matchPhraseQuery("company_tm", org_content).boost(7).slop(3))
|
|
|
+ .tieBreaker(0.3F)
|
|
|
+ ).tieBreaker(0.4F)
|
|
|
+ );
|
|
|
+
|
|
|
+
|
|
|
+ boolQuery.should(disMaxQuery()
|
|
|
+ .add(disMaxQuery()
|
|
|
+ .add(matchQuery("reg_location", content).boost(7).minimumShouldMatch("100%"))
|
|
|
+ .add(matchQuery("emails", org_content).boost(7).minimumShouldMatch("100%"))
|
|
|
+ .add(matchQuery("icp_domain", org_content).boost(1000).minimumShouldMatch("100%"))
|
|
|
+ .tieBreaker(0.3F)
|
|
|
+ ).tieBreaker(0.4F)
|
|
|
);
|
|
|
|
|
|
|