123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332 |
- package com.winhc.phoenix.example.service.impl;
- import com.winhc.phoenix.example.dao.SearchDao;
- import com.winhc.phoenix.example.enums.CompanyQueryType;
- import com.winhc.phoenix.example.enums.CompanySearchSortType;
- import com.winhc.phoenix.example.service.SearchService;
- import lombok.AllArgsConstructor;
- import lombok.extern.slf4j.Slf4j;
- import org.elasticsearch.index.query.*;
- import org.elasticsearch.index.query.functionscore.ScriptScoreFunctionBuilder;
- import org.elasticsearch.script.Script;
- import org.elasticsearch.script.ScriptType;
- import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
- import org.elasticsearch.search.rescore.QueryRescoreMode;
- import org.elasticsearch.search.rescore.QueryRescorerBuilder;
- import org.elasticsearch.search.rescore.RescoreBuilder;
- import org.elasticsearch.search.sort.FieldSortBuilder;
- import org.elasticsearch.search.sort.SortBuilders;
- import org.elasticsearch.search.sort.SortOrder;
- import org.springframework.context.annotation.Primary;
- import org.springframework.stereotype.Service;
- import java.util.*;
- import java.util.regex.Pattern;
- import static org.elasticsearch.index.query.QueryBuilders.*;
- /**
- * @author: XuJiakai
- * 2020/11/19 14:54
- */
- @Slf4j
- @Primary
- @Service(value = "v8_fast")
- @AllArgsConstructor
- public class SearchV8FastServiceImpl implements SearchService {
- private SearchDao searchDao;
- // public static final String index = "winhc-company-v8";
- // public static final String index = "winhc-company-v8_3";
- public static final String index = "winhc-company-v8_4";
- public static final String type = "company";
- private static final String[] includes = new String[]{"cname", "legal_entity*", "estiblish_time", "reg_status_std", "company_type", "province_code", "reg_capital", "logo", "new_cid"};
- private static final FetchSourceContext fetchSourceContext = new FetchSourceContext(true, includes, null);
- private static final String[] includes_tips = new String[]{"cname.show"};
- private static final FetchSourceContext fetchSourceContext_tips = new FetchSourceContext(true, includes_tips, null);
- @Override
- public Object tips(String content) {
- BoolQueryBuilder queryBuilder = boolQuery()
- .should(matchPhrasePrefixQuery("cname.value", content))
- // .should(matchPhraseQuery("app_info", content))
- ;
- FieldSortBuilder company_score_weight = SortBuilders.fieldSort("company_score_weight").order(SortOrder.DESC);
- Object search = searchDao.search(index, type, queryBuilder, company_score_weight, null, 0, 5);
- return search;
- }
- @Override
- public Object controlGroup(String s) {
- QueryBuilder boolQuery = getBoolQuery(s, new HashSet<>());
- return searchDao.search(index, type, boolQuery, null, fetchSourceContext, 0, 10);
- }
- private List<RescoreBuilder> getReScoreBuilder(String content) {
- Map<String, Object> map = new HashMap<String, Object>(2) {{
- put("query_content", content);
- put("der", 0.85);
- }};
- QueryRescorerBuilder rescorerBuilder_0 = new QueryRescorerBuilder(functionScoreQuery(new ScriptScoreFunctionBuilder(new Script(ScriptType.STORED, null, "company-name-term-score", map))))
- .windowSize(100)
- .setScoreMode(QueryRescoreMode.Total);
- QueryRescorerBuilder rescorerBuilder = new QueryRescorerBuilder(functionScoreQuery(new ScriptScoreFunctionBuilder(new Script(ScriptType.STORED, null, "company-search-script_v2", map))))
- .windowSize(100)
- .setScoreMode(QueryRescoreMode.Multiply);
- QueryRescorerBuilder rescorerBuilder2 = new QueryRescorerBuilder(getRescorerBool(content))
- .windowSize(50)
- .setScoreMode(QueryRescoreMode.Total);
- return Arrays.asList(rescorerBuilder_0, rescorerBuilder, rescorerBuilder2);
- }
- @Override
- public Object query(String content, int from, int size, Set<CompanyQueryType> set, CompanySearchSortType sortType) {
- //召回
- BoolQueryBuilder boolQuery = getBoolQuery(content, set);
- if (sortType == CompanySearchSortType.注册资本_从低到高) {
- FieldSortBuilder order = SortBuilders.fieldSort("reg_capital_amount").order(SortOrder.ASC);
- Object search = searchDao.search(index, type, boolQuery, null, order, null, from, size);
- return search;
- } else if (sortType == CompanySearchSortType.注册资本_从高到底) {
- FieldSortBuilder order = SortBuilders.fieldSort("reg_capital_amount").order(SortOrder.DESC);
- Object search = searchDao.search(index, type, boolQuery, null, order, null, from, size);
- return search;
- } else if (sortType == CompanySearchSortType.成立日期_从早到晚) {
- FieldSortBuilder order = SortBuilders.fieldSort("estiblish_time").order(SortOrder.ASC);
- Object search = searchDao.search(index, type, boolQuery, null, order, null, from, size);
- return search;
- } else if (sortType == CompanySearchSortType.成立日期_从晚到早) {
- FieldSortBuilder order = SortBuilders.fieldSort("estiblish_time").order(SortOrder.DESC);
- Object search = searchDao.search(index, type, boolQuery, null, order, null, from, size);
- return search;
- } else {
- List<RescoreBuilder> reScoreBuilder = getReScoreBuilder(content);
- Object search = searchDao.search(index, type, boolQuery, reScoreBuilder, null, null, from, size);
- return search;
- }
- }
- private BoolQueryBuilder getRescorerBool(String content) {
- BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
- // boolQuery.should(disMaxQuery()
- // .add(matchQuery("holder.name", content).boost(10))
- // .add(matchQuery("staff.name", content).boost(6))
- // .tieBreaker(0.3F)
- // );
- boolQuery.should(disMaxQuery()
- .add(matchQuery("icp", content).boost(8))
- .add(matchQuery("app_info", content).boost(19))
- .add(matchQuery("company_tm", content).boost(20))
- .tieBreaker(0.5F)
- );
- return boolQuery;
- }
- private BoolQueryBuilder getBoolQuery(String content, Set<CompanyQueryType> set) {
- BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
- // boolQuery.should(termQuery("cname.value.keyword", content).boost(1000));
- if (content.length() > 3) {
- boolQuery.should(termQuery("history_name.value.keyword", content).boost(1000));
- }
- boolQuery.should(disMaxQuery()
- .add(disMaxQuery()
- .add(termQuery("legal_entity_name.keyword", content).boost(10))
- .add(termQuery("holder.name.keyword", content).boost(10F))
- .add(termQuery("staff.name.keyword", content).boost(5.5F))
- .tieBreaker(0.3F)
- ).add(disMaxQuery()
- .add(matchQuery("legal_entity_name", content).boost(6).minimumShouldMatch("5<95%"))
- // .add(matchQuery("holder", content).boost(10).minimumShouldMatch("5<80%"))
- // .add(matchQuery("staff", content).boost(6).minimumShouldMatch("5<80%"))
- // .add(matchPhraseQuery("legal_entity_name", content).boost(6).slop(3))
- .add(matchPhraseQuery("holder.name", content).boost(10).slop(3))
- .add(matchPhraseQuery("staff.name", content).boost(6).slop(3))
- .tieBreaker(0.3F)
- ).tieBreaker(0.3F)
- );
- boolQuery.should(disMaxQuery()
- .add(disMaxQuery()
- .add(termQuery("icp.keyword", content).boost(20))
- .add(termQuery("app_info.keyword", content).boost(40))
- .add(termQuery("company_tm.keyword", content).boost(20))
- .tieBreaker(0.4F))
- .add(disMaxQuery()
- // .add(matchQuery("icp", content).boost(8).minimumShouldMatch("5"))
- // .add(matchQuery("app_info", content).boost(19).minimumShouldMatch("5"))
- // .add(matchQuery("company_tm", content).boost(7).minimumShouldMatch("5"))
- .add(matchPhraseQuery("icp", content).boost(8).slop(3))
- .add(matchPhraseQuery("app_info", content).boost(19).slop(3))
- .add(matchPhraseQuery("company_tm", content).boost(7).slop(3))
- .tieBreaker(0.3F)
- ).tieBreaker(0.4F)
- );
- boolQuery.should(
- disMaxQuery()
- .add(multiMatchQuery(content)
- .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
- .minimumShouldMatch("5<90%")
- .tieBreaker(0.3F)
- .field("cname.value", 16)
- .field("history_name.value", 12))
- .add(multiMatchQuery(content)
- .operator(Operator.AND)
- .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
- .tieBreaker(0.3F)
- .field("cname.value.standard", 16)
- .field("history_name.value.standard", 12))
- .tieBreaker(0.4F)
- );
- BoolQueryBuilder boolQuery2 = QueryBuilders.boolQuery()
- .filter(termQuery("deleted", "0"))
- .filter(rangeQuery("company_score_weight").gt(0.3F))
- .must(boolQuery);
- if (!set.isEmpty()) {
- BoolQueryBuilder filter = boolQuery();
- if (set.contains(CompanyQueryType.NAME)) {
- filter.should(getSpanNearQuery("cname.value.standard", content));
- filter.should(getSpanNearQuery("history_name.value.standard", content));
- }
- if (set.contains(CompanyQueryType.APP)) {
- filter.should(getSpanNearQuery("app_info.standard", content));
- }
- if (set.contains(CompanyQueryType.HOLDER_OR_STAFF)) {
- filter.should(getSpanNearQuery("holder.name.standard", content));
- filter.should(getSpanNearQuery("staff.name.standard", content));
- }
- if (set.contains(CompanyQueryType.LEGAL_REPRESENTATIVE)) {
- filter.should(getSpanNearQuery("legal_entity_name.standard", content));
- }
- boolQuery2.filter(filter);
- }
- return boolQuery2;
- }
- private static QueryBuilder getSpanNearQuery(String fields, String content) {
- SpanNearQueryBuilder spanNearQueryBuilder = spanNearQuery(spanTermQuery(fields, content.charAt(0) + ""), 1);
- for (int i = 1; i < content.length(); i++) {
- spanNearQueryBuilder.addClause(spanTermQuery(fields, content.charAt(i) + ""));
- }
- return spanNearQueryBuilder;
- }
- private BoolQueryBuilder getPersonQuery(String content) {
- BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
- boolQuery.should(termQuery("cname.value.keyword", content).boost(1000));
- boolQuery.should(termQuery("history_name.value.keyword", content).boost(1000));
- boolQuery.should(disMaxQuery()
- .add(disMaxQuery()
- .add(termQuery("legal_entity_name.keyword", content).boost(10))
- .add(termQuery("holder.name.keyword", content).boost(5.5F))
- .add(termQuery("staff.name.keyword", content).boost(5.5F))
- .tieBreaker(0.3F)
- ).add(disMaxQuery()
- .add(matchQuery("legal_entity_name", content).boost(6).minimumShouldMatch("5<95%"))
- .add(matchPhraseQuery("holder.name", content).boost(10).slop(3))
- .add(matchPhraseQuery("staff.name", content).boost(6).slop(3))
- .tieBreaker(0.3F)
- )
- .boost(2f)
- .tieBreaker(0.3F)
- );
- boolQuery.should(disMaxQuery()
- .add(disMaxQuery()
- .add(termQuery("icp.keyword", content).boost(20))
- .add(termQuery("app_info.keyword", content).boost(40))
- .add(termQuery("company_tm.keyword", content).boost(20))
- .tieBreaker(0.4F))
- .add(disMaxQuery()
- .add(matchPhraseQuery("icp", content).boost(8).slop(3))
- .add(matchPhraseQuery("app_info", content).boost(19).slop(3))
- .add(matchPhraseQuery("company_tm", content).boost(7).slop(3))
- .tieBreaker(0.3F)
- )
- .boost(0.5f)
- .tieBreaker(0.1F)
- );
- boolQuery.should(
- disMaxQuery()
- .add(multiMatchQuery(content)
- .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
- .minimumShouldMatch("5<90%")
- .tieBreaker(0.3F)
- .field("cname.value", 16)
- .field("history_name.value", 12))
- .add(multiMatchQuery(content)
- .operator(Operator.AND)
- .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
- .tieBreaker(0.3F)
- .field("cname.value.standard", 16)
- .field("history_name.value.standard", 12))
- .boost(0.5f)
- .tieBreaker(0.1F)
- );
- BoolQueryBuilder boolQuery2 = QueryBuilders.boolQuery()
- .filter(termQuery("deleted", "0"))
- .filter(rangeQuery("company_score_weight").gt(0.3F))
- .must(boolQuery);
- return boolQuery2;
- }
- private static final Pattern pattern = Pattern.compile("^[a-zA-Z ]*$");
- private static boolean is_pinyin(String str) {
- return pattern.matcher(str).find();
- }
- private void se() {
- String content = "";
- boolQuery().must(multiMatchQuery(content).field("name").minimumShouldMatch("100%"));
- Script script = new Script(ScriptType.INLINE, "painless", "return doc['company_score_weight'].value;", new HashMap<>());
- boolQuery().filter(scriptQuery(script));
- }
- }
|