|
@@ -1,5 +1,7 @@
|
|
|
package com.winhc.phoenix.example.util.company.search;
|
|
|
|
|
|
+import com.winhc.tool.nlp.CompanyNameAnalyzer;
|
|
|
+import lombok.extern.slf4j.Slf4j;
|
|
|
import org.apache.commons.lang3.StringUtils;
|
|
|
import org.elasticsearch.common.lucene.search.function.CombineFunction;
|
|
|
import org.elasticsearch.index.query.*;
|
|
@@ -10,6 +12,7 @@ import org.elasticsearch.script.ScriptType;
|
|
|
import org.elasticsearch.search.rescore.QueryRescoreMode;
|
|
|
import org.elasticsearch.search.rescore.QueryRescorerBuilder;
|
|
|
|
|
|
+import java.io.IOException;
|
|
|
import java.util.*;
|
|
|
|
|
|
import static org.elasticsearch.index.query.QueryBuilders.*;
|
|
@@ -18,16 +21,31 @@ import static org.elasticsearch.index.query.QueryBuilders.*;
|
|
|
* @author: XuJiakai
|
|
|
* 2021/11/17 17:17
|
|
|
*/
|
|
|
+@Slf4j
|
|
|
public class CompanySearchQueryUtils {
|
|
|
+ private static CompanyNameAnalyzer companyNameAnalyzer;
|
|
|
+
|
|
|
+ static {
|
|
|
+ try {
|
|
|
+ companyNameAnalyzer = new CompanyNameAnalyzer();
|
|
|
+ } catch (IOException exception) {
|
|
|
+ exception.printStackTrace();
|
|
|
+ throw new RuntimeException(exception);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
public static QueryBuilder getQueryBuilder(CompanyQueryVo companyQueryVo) {
|
|
|
|
|
|
String content = companyQueryVo.getContent();
|
|
|
- BoolQueryBuilder boolQuery = getBoolQuery(companyQueryVo);
|
|
|
+// BoolQueryBuilder boolQuery = getPersonBoolQuery(companyQueryVo);
|
|
|
+// BoolQueryBuilder boolQuery = getBoolQuery(companyQueryVo);
|
|
|
+ BoolQueryBuilder boolQuery = CompanySearchClassifyUtils.isPerson(content) ? getPersonBoolQuery(companyQueryVo) : getBoolQuery(companyQueryVo);
|
|
|
|
|
|
//以下为过滤逻辑
|
|
|
BoolQueryBuilder returnBoolQuery = boolQuery()
|
|
|
.filter(termQuery("deleted", "0"))
|
|
|
- .filter(boolQuery().should(rangeQuery("company_score_weight").gt(0.3F))
|
|
|
+ .filter(boolQuery().should(rangeQuery("company_score_weight").gt(0.2F))
|
|
|
.should(termsQuery("company_type", "2"))
|
|
|
)
|
|
|
.must(boolQuery);
|
|
@@ -82,7 +100,7 @@ public class CompanySearchQueryUtils {
|
|
|
|
|
|
Map<String, Object> map = new HashMap<String, Object>(2) {{
|
|
|
put("query_content", content);
|
|
|
- put("der", 0.85);
|
|
|
+ put("der", 0.9);//公司自有评分占比
|
|
|
}};
|
|
|
List<QueryRescorerBuilder> list = new ArrayList<>();
|
|
|
|
|
@@ -91,8 +109,17 @@ public class CompanySearchQueryUtils {
|
|
|
.windowSize(100)
|
|
|
.setScoreMode(QueryRescoreMode.Total));
|
|
|
|
|
|
+
|
|
|
//乘上权重分
|
|
|
- list.add(new QueryRescorerBuilder(functionScoreQuery(new ScriptScoreFunctionBuilder(new Script(ScriptType.STORED, null, "company-search-script_v2", map))))
|
|
|
+ /* list.add(new QueryRescorerBuilder(functionScoreQuery(new ScriptScoreFunctionBuilder(
|
|
|
+ ElasticSearchScriptTest.getTestScript(map)
|
|
|
+ )))
|
|
|
+ .windowSize(100)
|
|
|
+ .setScoreMode(QueryRescoreMode.Multiply));*/
|
|
|
+
|
|
|
+
|
|
|
+ //乘上权重分
|
|
|
+ list.add(new QueryRescorerBuilder(functionScoreQuery(new ScriptScoreFunctionBuilder(new Script(ScriptType.STORED, null, "company-search-script_v3", map))))
|
|
|
.windowSize(100)
|
|
|
.setScoreMode(QueryRescoreMode.Multiply));
|
|
|
|
|
@@ -105,11 +132,137 @@ public class CompanySearchQueryUtils {
|
|
|
}
|
|
|
|
|
|
|
|
|
+ /**
|
|
|
+ * 人员搜索
|
|
|
+ *
|
|
|
+ * @param companyQueryVo
|
|
|
+ * @return
|
|
|
+ */
|
|
|
+ private static BoolQueryBuilder getPersonBoolQuery(CompanyQueryVo companyQueryVo) {
|
|
|
+ log.debug("search keyword is person name: {} ", companyQueryVo.getContent());
|
|
|
+
|
|
|
+ BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
|
|
|
+ String org_content = companyQueryVo.getContent();
|
|
|
+ String content = CompanyQueryVo.cleanup(companyQueryVo.getContent());
|
|
|
+
|
|
|
+ if (content.length() > 3) {
|
|
|
+ boolQuery.should(disMaxQuery()
|
|
|
+ .add(termQuery("history_name.show.keyword", org_content))
|
|
|
+ .add(termQuery("history_name.value", content))
|
|
|
+ );
|
|
|
+
|
|
|
+ boolQuery.should(termQuery("cname.value", content).boost(0));
|
|
|
+ }
|
|
|
+
|
|
|
+ boolQuery.should(disMaxQuery()
|
|
|
+ .add(disMaxQuery()
|
|
|
+ .add(termQuery("legal_entities.name.keyword", org_content).boost(10))
|
|
|
+ .add(termQuery("holder.name.keyword", content).boost(10F))
|
|
|
+ .add(termQuery("holder.name.keyword", org_content).boost(10F))
|
|
|
+ .add(termQuery("holder_history.name.keyword", content).boost(10F))
|
|
|
+ .add(termQuery("staff.name.keyword", content).boost(8F))
|
|
|
+ .add(termQuery("staff_history.name.keyword", content).boost(5.5F))
|
|
|
+// .tieBreaker(0.3F)
|
|
|
+ )
|
|
|
+ .add(disMaxQuery()
|
|
|
+ .add(matchQuery("legal_entities.name", org_content).boost(10).minimumShouldMatch("5<95%"))
|
|
|
+ .add(matchPhraseQuery("holder.name", content).boost(10).slop(3))
|
|
|
+ .add(matchPhraseQuery("holder_history.name", content).boost(10).slop(3))
|
|
|
+ .add(matchPhraseQuery("staff.name", content).boost(8).slop(3))
|
|
|
+ .add(matchPhraseQuery("staff_history.name", content).boost(6).slop(3))
|
|
|
+// .tieBreaker(0.3F)
|
|
|
+ )
|
|
|
+// .tieBreaker(0.3F)
|
|
|
+ );
|
|
|
+
|
|
|
+ boolQuery.should(disMaxQuery()
|
|
|
+ .add(disMaxQuery()
|
|
|
+ .add(termQuery("icp.keyword", org_content).boost(1))
|
|
|
+ .add(termQuery("app_info.keyword", org_content).boost(2))
|
|
|
+ .add(termQuery("company_tm.keyword", org_content).boost(1))
|
|
|
+ .tieBreaker(0.4F))
|
|
|
+ .add(disMaxQuery()
|
|
|
+ .add(matchPhraseQuery("icp", content).boost(1).slop(3))
|
|
|
+ .add(matchPhraseQuery("app_info", content).boost(2).slop(3))
|
|
|
+ .add(matchPhraseQuery("company_tm", org_content).boost(1).slop(1))
|
|
|
+ .add(matchQuery("reg_location", content).boost(1).minimumShouldMatch("100%"))
|
|
|
+ .tieBreaker(0.3F))
|
|
|
+ .tieBreaker(0.4F)
|
|
|
+ );
|
|
|
+
|
|
|
+
|
|
|
+ DisMaxQueryBuilder add = disMaxQuery().add(disMaxQuery()
|
|
|
+ .add(disMaxQuery()
|
|
|
+ .add(matchPhraseQuery("cname.show.pinyin", content))
|
|
|
+ .add(matchPhraseQuery("history_name.show.pinyin", content))
|
|
|
+ )
|
|
|
+ .add(disMaxQuery()
|
|
|
+ .add(getSpanNearQuery("name_alias.standard", content).boost(1))
|
|
|
+ .add(getSpanNearQuery("cname.show.standard", content).boost(1))
|
|
|
+ .add(getSpanNearQuery("history_name.show.standard", content).boost(1))
|
|
|
+ )
|
|
|
+
|
|
|
+ .add(multiMatchQuery(content)
|
|
|
+ .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
|
|
|
+ .minimumShouldMatch("100%")
|
|
|
+ .tieBreaker(0.3F)
|
|
|
+
|
|
|
+ .field("cname.show", 1)
|
|
|
+ .field("name_alias", 1)
|
|
|
+ .field("history_name.show", 1))
|
|
|
+ .add(multiMatchQuery(content)
|
|
|
+ .operator(Operator.AND)
|
|
|
+ .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
|
|
|
+ .tieBreaker(0.3F)
|
|
|
+ .field("cname.show.standard", 1)
|
|
|
+ .field("history_name.show.standard", 1))
|
|
|
+
|
|
|
+ .tieBreaker(0.4F));
|
|
|
+
|
|
|
+ String simplifiedChinese = CompanyIndexUtils.convertToSimplifiedChinese(org_content);
|
|
|
+ if (!StringUtils.equals(org_content, simplifiedChinese)) {
|
|
|
+ //添加繁体字简化查询
|
|
|
+ add.add(disMaxQuery()
|
|
|
+ .add(disMaxQuery()
|
|
|
+ .add(matchPhraseQuery("cname.simplified_chinese.pinyin", simplifiedChinese))
|
|
|
+ .add(matchPhraseQuery("history_name.show.pinyin", simplifiedChinese))
|
|
|
+ )
|
|
|
+ .add(multiMatchQuery(simplifiedChinese)
|
|
|
+ .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
|
|
|
+ .minimumShouldMatch("5<90%")
|
|
|
+ .tieBreaker(0.3F)
|
|
|
+
|
|
|
+ .field("cname.simplified_chinese", 1)
|
|
|
+ .field("history_name.simplified_chinese", 1))
|
|
|
+ .add(multiMatchQuery(simplifiedChinese)
|
|
|
+ .operator(Operator.AND)
|
|
|
+ .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
|
|
|
+ .tieBreaker(0.3F)
|
|
|
+ .field("cname.simplified_chinese.standard", 1)
|
|
|
+ .field("history_name.simplified_chinese.standard", 1))
|
|
|
+ .tieBreaker(0.4F));
|
|
|
+ }
|
|
|
+
|
|
|
+ boolQuery.should(add);
|
|
|
+ return boolQuery;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 通用搜索
|
|
|
+ *
|
|
|
+ * @param companyQueryVo
|
|
|
+ * @return
|
|
|
+ */
|
|
|
private static BoolQueryBuilder getBoolQuery(CompanyQueryVo companyQueryVo) {
|
|
|
+ log.debug("search keyword is not person name: {} ", companyQueryVo.getContent());
|
|
|
+
|
|
|
BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
|
|
|
String org_content = companyQueryVo.getContent();
|
|
|
String content = CompanyQueryVo.cleanup(companyQueryVo.getContent());
|
|
|
|
|
|
+ String companyNameAlias = companyNameAnalyzer.nameAliasExtract(companyQueryVo.getContent());
|
|
|
+
|
|
|
if (content.length() > 3) {
|
|
|
boolQuery.should(disMaxQuery()
|
|
|
.add(termQuery("history_name.show.keyword", org_content))
|
|
@@ -127,25 +280,28 @@ public class CompanySearchQueryUtils {
|
|
|
.add(termQuery("phones.keyword", org_content).boost(1000))
|
|
|
.add(matchQuery("phones", org_content).boost(1000))
|
|
|
);
|
|
|
- boolQuery.should(termQuery("reg_location.keyword", org_content).boost(1000));
|
|
|
+ boolQuery.should(termQuery("reg_location.keyword", org_content).boost(100));
|
|
|
}
|
|
|
|
|
|
boolQuery.should(disMaxQuery()
|
|
|
- .add(disMaxQuery()
|
|
|
- .add(termQuery("legal_entity_name.keyword", org_content).boost(10))
|
|
|
- .add(termQuery("holder.name.keyword", content).boost(10F))
|
|
|
- .add(termQuery("holder_history.name.keyword", content).boost(10F))
|
|
|
- .add(termQuery("staff.name.keyword", content).boost(5.5F))
|
|
|
- .add(termQuery("staff_history.name.keyword", content).boost(5.5F))
|
|
|
- .tieBreaker(0.3F))
|
|
|
- .add(disMaxQuery()
|
|
|
- .add(matchQuery("legal_entity_name", org_content).boost(6).minimumShouldMatch("5<95%"))
|
|
|
- .add(matchPhraseQuery("holder.name", content).boost(10).slop(3))
|
|
|
- .add(matchPhraseQuery("holder_history.name", content).boost(10).slop(3))
|
|
|
- .add(matchPhraseQuery("staff.name", content).boost(6).slop(3))
|
|
|
- .add(matchPhraseQuery("staff_history.name", content).boost(6).slop(3))
|
|
|
- .tieBreaker(0.3F))
|
|
|
- .tieBreaker(0.3F)
|
|
|
+ .add(disMaxQuery()
|
|
|
+// .add(termQuery("legal_entity_name.keyword", org_content).boost(10))
|
|
|
+ .add(termQuery("legal_entities.name.keyword", org_content).boost(10))
|
|
|
+ .add(termQuery("holder.name.keyword", content).boost(10F))
|
|
|
+ .add(termQuery("holder.name.keyword", org_content).boost(10F))
|
|
|
+ .add(termQuery("holder_history.name.keyword", content).boost(10F))
|
|
|
+ .add(termQuery("staff.name.keyword", content).boost(5.5F))
|
|
|
+ .add(termQuery("staff_history.name.keyword", content).boost(5.5F))
|
|
|
+ .tieBreaker(0.3F))
|
|
|
+ .add(disMaxQuery()
|
|
|
+// .add(matchQuery("legal_entity_name", org_content).boost(6).minimumShouldMatch("5<95%"))
|
|
|
+ .add(matchQuery("legal_entities.name", org_content).boost(6).minimumShouldMatch("5<95%"))
|
|
|
+ .add(matchPhraseQuery("holder.name", content).boost(10).slop(3))
|
|
|
+ .add(matchPhraseQuery("holder_history.name", content).boost(10).slop(3))
|
|
|
+ .add(matchPhraseQuery("staff.name", content).boost(6).slop(3))
|
|
|
+ .add(matchPhraseQuery("staff_history.name", content).boost(6).slop(3))
|
|
|
+ .tieBreaker(0.3F))
|
|
|
+ .tieBreaker(0.3F)
|
|
|
);
|
|
|
boolQuery.should(disMaxQuery()
|
|
|
.add(disMaxQuery()
|
|
@@ -166,23 +322,52 @@ public class CompanySearchQueryUtils {
|
|
|
boolQuery.should(disMaxQuery()
|
|
|
.add(disMaxQuery()
|
|
|
.add(matchQuery("emails", org_content).boost(7).minimumShouldMatch("100%"))
|
|
|
- .add(matchQuery("icp_domain", org_content).boost(1000).minimumShouldMatch("100%"))
|
|
|
+ .add(matchQuery("icp_domain", org_content).boost(1000).minimumShouldMatch("50%"))
|
|
|
|
|
|
.tieBreaker(0.3F))
|
|
|
.tieBreaker(0.4F)
|
|
|
);
|
|
|
|
|
|
+ /* Map<String, Object> map = new HashMap<String, Object>(1) {
|
|
|
+ {
|
|
|
+ put("name", content);
|
|
|
+ }
|
|
|
+ };
|
|
|
+
|
|
|
+ Script painless = new Script(ScriptType.INLINE, "painless", "String name_alias = doc['name_alias.keyword'].value;if(name_alias==null)return false;return params.name.contains(name_alias);", map);
|
|
|
+
|
|
|
+ boolQuery.should(boolQuery()
|
|
|
+ .must(matchQuery("name_alias", content).operator(Operator.OR))
|
|
|
+ .must(scriptQuery(painless)));*/
|
|
|
+
|
|
|
+
|
|
|
DisMaxQueryBuilder add = disMaxQuery().add(disMaxQuery()
|
|
|
.add(disMaxQuery()
|
|
|
.add(matchPhraseQuery("cname.show.pinyin", content))
|
|
|
.add(matchPhraseQuery("history_name.show.pinyin", content))
|
|
|
)
|
|
|
+ .add(disMaxQuery()
|
|
|
+ .add(getSpanNearQuery("name_alias.standard", content).boost(20))
|
|
|
+ .add(getSpanNearQuery("cname.show.standard", content).boost(10))
|
|
|
+ .add(getSpanNearQuery("history_name.show.standard", content).boost(5))
|
|
|
+ )
|
|
|
+
|
|
|
.add(multiMatchQuery(content)
|
|
|
.type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
|
|
|
.minimumShouldMatch("5<90%")
|
|
|
.tieBreaker(0.3F)
|
|
|
|
|
|
.field("cname.show", 16)
|
|
|
+ .field("name_alias", 20)
|
|
|
+ .field("history_name.show", 12))
|
|
|
+
|
|
|
+ .add(multiMatchQuery(companyNameAlias)
|
|
|
+ .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
|
|
|
+ .minimumShouldMatch("5<90%")
|
|
|
+ .tieBreaker(0.3F)
|
|
|
+
|
|
|
+ .field("cname.show", 16)
|
|
|
+ .field("name_alias", 20)
|
|
|
.field("history_name.show", 12))
|
|
|
.add(multiMatchQuery(content)
|
|
|
.operator(Operator.AND)
|
|
@@ -194,7 +379,7 @@ public class CompanySearchQueryUtils {
|
|
|
.tieBreaker(0.4F));
|
|
|
|
|
|
String simplifiedChinese = CompanyIndexUtils.convertToSimplifiedChinese(org_content);
|
|
|
- if (StringUtils.isNotBlank(simplifiedChinese)) {
|
|
|
+ if (!StringUtils.equals(org_content, simplifiedChinese)) {
|
|
|
//添加繁体字简化查询
|
|
|
add.add(disMaxQuery()
|
|
|
.add(disMaxQuery()
|