package com.winhc.phoenix.example.service.impl; import com.winhc.phoenix.example.dao.SearchDao; import com.winhc.phoenix.example.enums.CompanyQueryType; import com.winhc.phoenix.example.enums.CompanySearchSortType; import com.winhc.phoenix.example.service.SearchService; import lombok.AllArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.elasticsearch.index.query.*; import org.elasticsearch.index.query.functionscore.ScriptScoreFunctionBuilder; import org.elasticsearch.script.Script; import org.elasticsearch.script.ScriptType; import org.elasticsearch.search.fetch.subphase.FetchSourceContext; import org.elasticsearch.search.rescore.QueryRescoreMode; import org.elasticsearch.search.rescore.QueryRescorerBuilder; import org.elasticsearch.search.rescore.RescoreBuilder; import org.elasticsearch.search.sort.FieldSortBuilder; import org.elasticsearch.search.sort.SortBuilders; import org.elasticsearch.search.sort.SortOrder; import org.springframework.context.annotation.Primary; import org.springframework.stereotype.Service; import java.util.*; import java.util.regex.Pattern; import static org.elasticsearch.index.query.QueryBuilders.*; /** * @author: XuJiakai * 2020/11/19 14:54 */ @Slf4j @Primary @Service(value = "v8_fast") @AllArgsConstructor public class SearchV8FastServiceImpl implements SearchService { private SearchDao searchDao; // public static final String index = "winhc-company-v8"; // public static final String index = "winhc-company-v8_3"; public static final String index = "winhc-company-v8_4"; public static final String type = "company"; private static final String[] includes = new String[]{"cname", "legal_entity*", "estiblish_time", "reg_status_std", "company_type", "province_code", "reg_capital", "logo", "new_cid"}; private static final FetchSourceContext fetchSourceContext = new FetchSourceContext(true, includes, null); private static final String[] includes_tips = new String[]{"cname.show"}; private static final FetchSourceContext fetchSourceContext_tips = new FetchSourceContext(true, includes_tips, null); @Override public Object tips(String content) { BoolQueryBuilder queryBuilder = boolQuery() .should(matchPhrasePrefixQuery("cname.value", content)) // .should(matchPhraseQuery("app_info", content)) ; FieldSortBuilder company_score_weight = SortBuilders.fieldSort("company_score_weight").order(SortOrder.DESC); Object search = searchDao.search(index, type, queryBuilder, company_score_weight, null, 0, 5); return search; } @Override public Object controlGroup(String s) { QueryBuilder boolQuery = getBoolQuery(s, new HashSet<>()); return searchDao.search(index, type, boolQuery, null, fetchSourceContext, 0, 10); } private List getReScoreBuilder(String content) { Map map = new HashMap(2) {{ put("query_content", content); put("der", 0.85); }}; QueryRescorerBuilder rescorerBuilder_0 = new QueryRescorerBuilder(functionScoreQuery(new ScriptScoreFunctionBuilder(new Script(ScriptType.STORED, null, "company-name-term-score", map)))) .windowSize(100) .setScoreMode(QueryRescoreMode.Total); QueryRescorerBuilder rescorerBuilder = new QueryRescorerBuilder(functionScoreQuery(new ScriptScoreFunctionBuilder(new Script(ScriptType.STORED, null, "company-search-script_v2", map)))) .windowSize(100) .setScoreMode(QueryRescoreMode.Multiply); QueryRescorerBuilder rescorerBuilder2 = new QueryRescorerBuilder(getRescorerBool(content)) .windowSize(50) .setScoreMode(QueryRescoreMode.Total); return Arrays.asList(rescorerBuilder_0, rescorerBuilder, rescorerBuilder2); } @Override public Object query(String content, int from, int size, Set set, CompanySearchSortType sortType) { //召回 BoolQueryBuilder boolQuery = getBoolQuery(content, set); if (sortType == CompanySearchSortType.注册资本_从低到高) { FieldSortBuilder order = SortBuilders.fieldSort("reg_capital_amount").order(SortOrder.ASC); Object search = searchDao.search(index, type, boolQuery, null, order, null, from, size); return search; } else if (sortType == CompanySearchSortType.注册资本_从高到底) { FieldSortBuilder order = SortBuilders.fieldSort("reg_capital_amount").order(SortOrder.DESC); Object search = searchDao.search(index, type, boolQuery, null, order, null, from, size); return search; } else if (sortType == CompanySearchSortType.成立日期_从早到晚) { FieldSortBuilder order = SortBuilders.fieldSort("estiblish_time").order(SortOrder.ASC); Object search = searchDao.search(index, type, boolQuery, null, order, null, from, size); return search; } else if (sortType == CompanySearchSortType.成立日期_从晚到早) { FieldSortBuilder order = SortBuilders.fieldSort("estiblish_time").order(SortOrder.DESC); Object search = searchDao.search(index, type, boolQuery, null, order, null, from, size); return search; } else { List reScoreBuilder = getReScoreBuilder(content); Object search = searchDao.search(index, type, boolQuery, reScoreBuilder, null, null, from, size); return search; } } private BoolQueryBuilder getRescorerBool(String content) { BoolQueryBuilder boolQuery = QueryBuilders.boolQuery(); // boolQuery.should(disMaxQuery() // .add(matchQuery("holder.name", content).boost(10)) // .add(matchQuery("staff.name", content).boost(6)) // .tieBreaker(0.3F) // ); boolQuery.should(disMaxQuery() .add(matchQuery("icp", content).boost(8)) .add(matchQuery("app_info", content).boost(19)) .add(matchQuery("company_tm", content).boost(20)) .tieBreaker(0.5F) ); return boolQuery; } private BoolQueryBuilder getBoolQuery(String content, Set set) { BoolQueryBuilder boolQuery = QueryBuilders.boolQuery(); // boolQuery.should(termQuery("cname.value.keyword", content).boost(1000)); if (content.length() > 3) { boolQuery.should(termQuery("history_name.value.keyword", content).boost(1000)); } boolQuery.should(disMaxQuery() .add(disMaxQuery() .add(termQuery("legal_entity_name.keyword", content).boost(10)) .add(termQuery("holder.name.keyword", content).boost(10F)) .add(termQuery("staff.name.keyword", content).boost(5.5F)) .tieBreaker(0.3F) ).add(disMaxQuery() .add(matchQuery("legal_entity_name", content).boost(6).minimumShouldMatch("5<95%")) // .add(matchQuery("holder", content).boost(10).minimumShouldMatch("5<80%")) // .add(matchQuery("staff", content).boost(6).minimumShouldMatch("5<80%")) // .add(matchPhraseQuery("legal_entity_name", content).boost(6).slop(3)) .add(matchPhraseQuery("holder.name", content).boost(10).slop(3)) .add(matchPhraseQuery("staff.name", content).boost(6).slop(3)) .tieBreaker(0.3F) ).tieBreaker(0.3F) ); boolQuery.should(disMaxQuery() .add(disMaxQuery() .add(termQuery("icp.keyword", content).boost(20)) .add(termQuery("app_info.keyword", content).boost(40)) .add(termQuery("company_tm.keyword", content).boost(20)) .tieBreaker(0.4F)) .add(disMaxQuery() // .add(matchQuery("icp", content).boost(8).minimumShouldMatch("5")) // .add(matchQuery("app_info", content).boost(19).minimumShouldMatch("5")) // .add(matchQuery("company_tm", content).boost(7).minimumShouldMatch("5")) .add(matchPhraseQuery("icp", content).boost(8).slop(3)) .add(matchPhraseQuery("app_info", content).boost(19).slop(3)) .add(matchPhraseQuery("company_tm", content).boost(7).slop(3)) .tieBreaker(0.3F) ).tieBreaker(0.4F) ); boolQuery.should( disMaxQuery() .add(multiMatchQuery(content) .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS) .minimumShouldMatch("5<90%") .tieBreaker(0.3F) .field("cname.value", 16) .field("history_name.value", 12)) .add(multiMatchQuery(content) .operator(Operator.AND) .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS) .tieBreaker(0.3F) .field("cname.value.standard", 16) .field("history_name.value.standard", 12)) .tieBreaker(0.4F) ); BoolQueryBuilder boolQuery2 = QueryBuilders.boolQuery() .filter(termQuery("deleted", "0")) .filter(rangeQuery("company_score_weight").gt(0.3F)) .must(boolQuery); if (!set.isEmpty()) { BoolQueryBuilder filter = boolQuery(); if (set.contains(CompanyQueryType.NAME)) { filter.should(getSpanNearQuery("cname.value.standard", content)); filter.should(getSpanNearQuery("history_name.value.standard", content)); } if (set.contains(CompanyQueryType.APP)) { filter.should(getSpanNearQuery("app_info.standard", content)); } if (set.contains(CompanyQueryType.HOLDER_OR_STAFF)) { filter.should(getSpanNearQuery("holder.name.standard", content)); filter.should(getSpanNearQuery("staff.name.standard", content)); } if (set.contains(CompanyQueryType.LEGAL_REPRESENTATIVE)) { filter.should(getSpanNearQuery("legal_entity_name.standard", content)); } boolQuery2.filter(filter); } return boolQuery2; } private static QueryBuilder getSpanNearQuery(String fields, String content) { SpanNearQueryBuilder spanNearQueryBuilder = spanNearQuery(spanTermQuery(fields, content.charAt(0) + ""), 1); for (int i = 1; i < content.length(); i++) { spanNearQueryBuilder.addClause(spanTermQuery(fields, content.charAt(i) + "")); } return spanNearQueryBuilder; } private BoolQueryBuilder getPersonQuery(String content) { BoolQueryBuilder boolQuery = QueryBuilders.boolQuery(); boolQuery.should(termQuery("cname.value.keyword", content).boost(1000)); boolQuery.should(termQuery("history_name.value.keyword", content).boost(1000)); boolQuery.should(disMaxQuery() .add(disMaxQuery() .add(termQuery("legal_entity_name.keyword", content).boost(10)) .add(termQuery("holder.name.keyword", content).boost(5.5F)) .add(termQuery("staff.name.keyword", content).boost(5.5F)) .tieBreaker(0.3F) ).add(disMaxQuery() .add(matchQuery("legal_entity_name", content).boost(6).minimumShouldMatch("5<95%")) .add(matchPhraseQuery("holder.name", content).boost(10).slop(3)) .add(matchPhraseQuery("staff.name", content).boost(6).slop(3)) .tieBreaker(0.3F) ) .boost(2f) .tieBreaker(0.3F) ); boolQuery.should(disMaxQuery() .add(disMaxQuery() .add(termQuery("icp.keyword", content).boost(20)) .add(termQuery("app_info.keyword", content).boost(40)) .add(termQuery("company_tm.keyword", content).boost(20)) .tieBreaker(0.4F)) .add(disMaxQuery() .add(matchPhraseQuery("icp", content).boost(8).slop(3)) .add(matchPhraseQuery("app_info", content).boost(19).slop(3)) .add(matchPhraseQuery("company_tm", content).boost(7).slop(3)) .tieBreaker(0.3F) ) .boost(0.5f) .tieBreaker(0.1F) ); boolQuery.should( disMaxQuery() .add(multiMatchQuery(content) .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS) .minimumShouldMatch("5<90%") .tieBreaker(0.3F) .field("cname.value", 16) .field("history_name.value", 12)) .add(multiMatchQuery(content) .operator(Operator.AND) .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS) .tieBreaker(0.3F) .field("cname.value.standard", 16) .field("history_name.value.standard", 12)) .boost(0.5f) .tieBreaker(0.1F) ); BoolQueryBuilder boolQuery2 = QueryBuilders.boolQuery() .filter(termQuery("deleted", "0")) .filter(rangeQuery("company_score_weight").gt(0.3F)) .must(boolQuery); return boolQuery2; } private static final Pattern pattern = Pattern.compile("^[a-zA-Z ]*$"); private static boolean is_pinyin(String str) { return pattern.matcher(str).find(); } private void se() { String content = ""; boolQuery().must(multiMatchQuery(content).field("name").minimumShouldMatch("100%")); Script script = new Script(ScriptType.INLINE, "painless", "return doc['company_score_weight'].value;", new HashMap<>()); boolQuery().filter(scriptQuery(script)); } }