SearchV8FastServiceImpl.java 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332
  1. package com.winhc.phoenix.example.service.impl;
  2. import com.winhc.phoenix.example.dao.SearchDao;
  3. import com.winhc.phoenix.example.enums.CompanyQueryType;
  4. import com.winhc.phoenix.example.enums.CompanySearchSortType;
  5. import com.winhc.phoenix.example.service.SearchService;
  6. import lombok.AllArgsConstructor;
  7. import lombok.extern.slf4j.Slf4j;
  8. import org.elasticsearch.index.query.*;
  9. import org.elasticsearch.index.query.functionscore.ScriptScoreFunctionBuilder;
  10. import org.elasticsearch.script.Script;
  11. import org.elasticsearch.script.ScriptType;
  12. import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
  13. import org.elasticsearch.search.rescore.QueryRescoreMode;
  14. import org.elasticsearch.search.rescore.QueryRescorerBuilder;
  15. import org.elasticsearch.search.rescore.RescoreBuilder;
  16. import org.elasticsearch.search.sort.FieldSortBuilder;
  17. import org.elasticsearch.search.sort.SortBuilders;
  18. import org.elasticsearch.search.sort.SortOrder;
  19. import org.springframework.context.annotation.Primary;
  20. import org.springframework.stereotype.Service;
  21. import java.util.*;
  22. import java.util.regex.Pattern;
  23. import static org.elasticsearch.index.query.QueryBuilders.*;
  24. /**
  25. * @author: XuJiakai
  26. * 2020/11/19 14:54
  27. */
  28. @Slf4j
  29. @Primary
  30. @Service(value = "v8_fast")
  31. @AllArgsConstructor
  32. public class SearchV8FastServiceImpl implements SearchService {
  33. private SearchDao searchDao;
  34. // public static final String index = "winhc-company-v8";
  35. // public static final String index = "winhc-company-v8_3";
  36. public static final String index = "winhc-company-v8_4";
  37. public static final String type = "company";
  38. private static final String[] includes = new String[]{"cname", "legal_entity*", "estiblish_time", "reg_status_std", "company_type", "province_code", "reg_capital", "logo", "new_cid"};
  39. private static final FetchSourceContext fetchSourceContext = new FetchSourceContext(true, includes, null);
  40. private static final String[] includes_tips = new String[]{"cname.show"};
  41. private static final FetchSourceContext fetchSourceContext_tips = new FetchSourceContext(true, includes_tips, null);
  42. @Override
  43. public Object tips(String content) {
  44. BoolQueryBuilder queryBuilder = boolQuery()
  45. .should(matchPhrasePrefixQuery("cname.value", content))
  46. // .should(matchPhraseQuery("app_info", content))
  47. ;
  48. FieldSortBuilder company_score_weight = SortBuilders.fieldSort("company_score_weight").order(SortOrder.DESC);
  49. Object search = searchDao.search(index, type, queryBuilder, company_score_weight, null, 0, 5);
  50. return search;
  51. }
  52. @Override
  53. public Object controlGroup(String s) {
  54. QueryBuilder boolQuery = getBoolQuery(s, new HashSet<>());
  55. return searchDao.search(index, type, boolQuery, null, fetchSourceContext, 0, 10);
  56. }
  57. private List<RescoreBuilder> getReScoreBuilder(String content) {
  58. Map<String, Object> map = new HashMap<String, Object>(2) {{
  59. put("query_content", content);
  60. put("der", 0.85);
  61. }};
  62. QueryRescorerBuilder rescorerBuilder_0 = new QueryRescorerBuilder(functionScoreQuery(new ScriptScoreFunctionBuilder(new Script(ScriptType.STORED, null, "company-name-term-score", map))))
  63. .windowSize(100)
  64. .setScoreMode(QueryRescoreMode.Total);
  65. QueryRescorerBuilder rescorerBuilder = new QueryRescorerBuilder(functionScoreQuery(new ScriptScoreFunctionBuilder(new Script(ScriptType.STORED, null, "company-search-script_v2", map))))
  66. .windowSize(100)
  67. .setScoreMode(QueryRescoreMode.Multiply);
  68. QueryRescorerBuilder rescorerBuilder2 = new QueryRescorerBuilder(getRescorerBool(content))
  69. .windowSize(50)
  70. .setScoreMode(QueryRescoreMode.Total);
  71. return Arrays.asList(rescorerBuilder_0, rescorerBuilder, rescorerBuilder2);
  72. }
  73. @Override
  74. public Object query(String content, int from, int size, Set<CompanyQueryType> set, CompanySearchSortType sortType) {
  75. //召回
  76. BoolQueryBuilder boolQuery = getBoolQuery(content, set);
  77. if (sortType == CompanySearchSortType.注册资本_从低到高) {
  78. FieldSortBuilder order = SortBuilders.fieldSort("reg_capital_amount").order(SortOrder.ASC);
  79. Object search = searchDao.search(index, type, boolQuery, null, order, null, from, size);
  80. return search;
  81. } else if (sortType == CompanySearchSortType.注册资本_从高到底) {
  82. FieldSortBuilder order = SortBuilders.fieldSort("reg_capital_amount").order(SortOrder.DESC);
  83. Object search = searchDao.search(index, type, boolQuery, null, order, null, from, size);
  84. return search;
  85. } else if (sortType == CompanySearchSortType.成立日期_从早到晚) {
  86. FieldSortBuilder order = SortBuilders.fieldSort("estiblish_time").order(SortOrder.ASC);
  87. Object search = searchDao.search(index, type, boolQuery, null, order, null, from, size);
  88. return search;
  89. } else if (sortType == CompanySearchSortType.成立日期_从晚到早) {
  90. FieldSortBuilder order = SortBuilders.fieldSort("estiblish_time").order(SortOrder.DESC);
  91. Object search = searchDao.search(index, type, boolQuery, null, order, null, from, size);
  92. return search;
  93. } else {
  94. List<RescoreBuilder> reScoreBuilder = getReScoreBuilder(content);
  95. Object search = searchDao.search(index, type, boolQuery, reScoreBuilder, null, null, from, size);
  96. return search;
  97. }
  98. }
  99. private BoolQueryBuilder getRescorerBool(String content) {
  100. BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
  101. // boolQuery.should(disMaxQuery()
  102. // .add(matchQuery("holder.name", content).boost(10))
  103. // .add(matchQuery("staff.name", content).boost(6))
  104. // .tieBreaker(0.3F)
  105. // );
  106. boolQuery.should(disMaxQuery()
  107. .add(matchQuery("icp", content).boost(8))
  108. .add(matchQuery("app_info", content).boost(19))
  109. .add(matchQuery("company_tm", content).boost(20))
  110. .tieBreaker(0.5F)
  111. );
  112. return boolQuery;
  113. }
  114. private BoolQueryBuilder getBoolQuery(String content, Set<CompanyQueryType> set) {
  115. BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
  116. // boolQuery.should(termQuery("cname.value.keyword", content).boost(1000));
  117. if (content.length() > 3) {
  118. boolQuery.should(termQuery("history_name.value.keyword", content).boost(1000));
  119. }
  120. boolQuery.should(disMaxQuery()
  121. .add(disMaxQuery()
  122. .add(termQuery("legal_entity_name.keyword", content).boost(10))
  123. .add(termQuery("holder.name.keyword", content).boost(10F))
  124. .add(termQuery("staff.name.keyword", content).boost(5.5F))
  125. .tieBreaker(0.3F)
  126. ).add(disMaxQuery()
  127. .add(matchQuery("legal_entity_name", content).boost(6).minimumShouldMatch("5<95%"))
  128. // .add(matchQuery("holder", content).boost(10).minimumShouldMatch("5<80%"))
  129. // .add(matchQuery("staff", content).boost(6).minimumShouldMatch("5<80%"))
  130. // .add(matchPhraseQuery("legal_entity_name", content).boost(6).slop(3))
  131. .add(matchPhraseQuery("holder.name", content).boost(10).slop(3))
  132. .add(matchPhraseQuery("staff.name", content).boost(6).slop(3))
  133. .tieBreaker(0.3F)
  134. ).tieBreaker(0.3F)
  135. );
  136. boolQuery.should(disMaxQuery()
  137. .add(disMaxQuery()
  138. .add(termQuery("icp.keyword", content).boost(20))
  139. .add(termQuery("app_info.keyword", content).boost(40))
  140. .add(termQuery("company_tm.keyword", content).boost(20))
  141. .tieBreaker(0.4F))
  142. .add(disMaxQuery()
  143. // .add(matchQuery("icp", content).boost(8).minimumShouldMatch("5"))
  144. // .add(matchQuery("app_info", content).boost(19).minimumShouldMatch("5"))
  145. // .add(matchQuery("company_tm", content).boost(7).minimumShouldMatch("5"))
  146. .add(matchPhraseQuery("icp", content).boost(8).slop(3))
  147. .add(matchPhraseQuery("app_info", content).boost(19).slop(3))
  148. .add(matchPhraseQuery("company_tm", content).boost(7).slop(3))
  149. .tieBreaker(0.3F)
  150. ).tieBreaker(0.4F)
  151. );
  152. boolQuery.should(
  153. disMaxQuery()
  154. .add(multiMatchQuery(content)
  155. .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
  156. .minimumShouldMatch("5<90%")
  157. .tieBreaker(0.3F)
  158. .field("cname.value", 16)
  159. .field("history_name.value", 12))
  160. .add(multiMatchQuery(content)
  161. .operator(Operator.AND)
  162. .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
  163. .tieBreaker(0.3F)
  164. .field("cname.value.standard", 16)
  165. .field("history_name.value.standard", 12))
  166. .tieBreaker(0.4F)
  167. );
  168. BoolQueryBuilder boolQuery2 = QueryBuilders.boolQuery()
  169. .filter(termQuery("deleted", "0"))
  170. .filter(rangeQuery("company_score_weight").gt(0.3F))
  171. .must(boolQuery);
  172. if (!set.isEmpty()) {
  173. BoolQueryBuilder filter = boolQuery();
  174. if (set.contains(CompanyQueryType.NAME)) {
  175. filter.should(getSpanNearQuery("cname.value.standard", content));
  176. filter.should(getSpanNearQuery("history_name.value.standard", content));
  177. }
  178. if (set.contains(CompanyQueryType.APP)) {
  179. filter.should(getSpanNearQuery("app_info.standard", content));
  180. }
  181. if (set.contains(CompanyQueryType.HOLDER_OR_STAFF)) {
  182. filter.should(getSpanNearQuery("holder.name.standard", content));
  183. filter.should(getSpanNearQuery("staff.name.standard", content));
  184. }
  185. if (set.contains(CompanyQueryType.LEGAL_REPRESENTATIVE)) {
  186. filter.should(getSpanNearQuery("legal_entity_name.standard", content));
  187. }
  188. boolQuery2.filter(filter);
  189. }
  190. return boolQuery2;
  191. }
  192. private static QueryBuilder getSpanNearQuery(String fields, String content) {
  193. SpanNearQueryBuilder spanNearQueryBuilder = spanNearQuery(spanTermQuery(fields, content.charAt(0) + ""), 1);
  194. for (int i = 1; i < content.length(); i++) {
  195. spanNearQueryBuilder.addClause(spanTermQuery(fields, content.charAt(i) + ""));
  196. }
  197. return spanNearQueryBuilder;
  198. }
  199. private BoolQueryBuilder getPersonQuery(String content) {
  200. BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
  201. boolQuery.should(termQuery("cname.value.keyword", content).boost(1000));
  202. boolQuery.should(termQuery("history_name.value.keyword", content).boost(1000));
  203. boolQuery.should(disMaxQuery()
  204. .add(disMaxQuery()
  205. .add(termQuery("legal_entity_name.keyword", content).boost(10))
  206. .add(termQuery("holder.name.keyword", content).boost(5.5F))
  207. .add(termQuery("staff.name.keyword", content).boost(5.5F))
  208. .tieBreaker(0.3F)
  209. ).add(disMaxQuery()
  210. .add(matchQuery("legal_entity_name", content).boost(6).minimumShouldMatch("5<95%"))
  211. .add(matchPhraseQuery("holder.name", content).boost(10).slop(3))
  212. .add(matchPhraseQuery("staff.name", content).boost(6).slop(3))
  213. .tieBreaker(0.3F)
  214. )
  215. .boost(2f)
  216. .tieBreaker(0.3F)
  217. );
  218. boolQuery.should(disMaxQuery()
  219. .add(disMaxQuery()
  220. .add(termQuery("icp.keyword", content).boost(20))
  221. .add(termQuery("app_info.keyword", content).boost(40))
  222. .add(termQuery("company_tm.keyword", content).boost(20))
  223. .tieBreaker(0.4F))
  224. .add(disMaxQuery()
  225. .add(matchPhraseQuery("icp", content).boost(8).slop(3))
  226. .add(matchPhraseQuery("app_info", content).boost(19).slop(3))
  227. .add(matchPhraseQuery("company_tm", content).boost(7).slop(3))
  228. .tieBreaker(0.3F)
  229. )
  230. .boost(0.5f)
  231. .tieBreaker(0.1F)
  232. );
  233. boolQuery.should(
  234. disMaxQuery()
  235. .add(multiMatchQuery(content)
  236. .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
  237. .minimumShouldMatch("5<90%")
  238. .tieBreaker(0.3F)
  239. .field("cname.value", 16)
  240. .field("history_name.value", 12))
  241. .add(multiMatchQuery(content)
  242. .operator(Operator.AND)
  243. .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
  244. .tieBreaker(0.3F)
  245. .field("cname.value.standard", 16)
  246. .field("history_name.value.standard", 12))
  247. .boost(0.5f)
  248. .tieBreaker(0.1F)
  249. );
  250. BoolQueryBuilder boolQuery2 = QueryBuilders.boolQuery()
  251. .filter(termQuery("deleted", "0"))
  252. .filter(rangeQuery("company_score_weight").gt(0.3F))
  253. .must(boolQuery);
  254. return boolQuery2;
  255. }
  256. private static final Pattern pattern = Pattern.compile("^[a-zA-Z ]*$");
  257. private static boolean is_pinyin(String str) {
  258. return pattern.matcher(str).find();
  259. }
  260. private void se() {
  261. String content = "";
  262. boolQuery().must(multiMatchQuery(content).field("name").minimumShouldMatch("100%"));
  263. Script script = new Script(ScriptType.INLINE, "painless", "return doc['company_score_weight'].value;", new HashMap<>());
  264. boolQuery().filter(scriptQuery(script));
  265. }
  266. }