Browse Source

es定时删除任务

xufei 3 năm trước cách đây
mục cha
commit
c146fe0dcf
27 tập tin đã thay đổi với 811 bổ sung2091 xóa
  1. 17 28
      pom.xml
  2. 91 91
      src/main/java/com/winhc/task/configuration/ElasticSearchConfiguration.java
  3. 35 0
      src/main/java/com/winhc/task/configuration/MultiESSTartConfigurer.java
  4. 0 24
      src/main/java/com/winhc/task/dao/SearchDao.java
  5. 0 122
      src/main/java/com/winhc/task/dao/impl/SearchDaoImpl.java
  6. 4 3
      src/main/java/com/winhc/task/framework/es/EsFastScan.java
  7. 0 123
      src/main/java/com/winhc/task/job/BatchQueryEsTest.java
  8. 159 159
      src/main/java/com/winhc/task/job/CalcSummaryJob.java
  9. 0 42
      src/main/java/com/winhc/task/job/DeleteHbaseByMongoJob.java
  10. 0 153
      src/main/java/com/winhc/task/job/EsIndexJobs.java
  11. 0 70
      src/main/java/com/winhc/task/job/EsQueryAggSumJobs.java
  12. 0 85
      src/main/java/com/winhc/task/job/EsQueryListJobs.java
  13. 0 104
      src/main/java/com/winhc/task/job/EsScanJobSumAgg.java
  14. 0 115
      src/main/java/com/winhc/task/job/EsScanJobSumAggPlus.java
  15. 149 0
      src/main/java/com/winhc/task/run/CleanDataTask.java
  16. 0 53
      src/main/java/com/winhc/task/service/impl/InnerSearchCompanyServiceImpl.java
  17. 0 123
      src/main/java/com/winhc/task/service/impl/SearchV7ServiceImpl.java
  18. 0 232
      src/main/java/com/winhc/task/service/impl/SearchV8FastServiceImpl.java
  19. 0 255
      src/main/java/com/winhc/task/service/impl/SearchV8ServiceImpl.java
  20. 0 119
      src/main/java/com/winhc/task/service/impl/SearchV8SimpServiceImpl.java
  21. 31 2
      src/main/java/com/winhc/task/util/BaseUtils.java
  22. 45 0
      src/main/java/com/winhc/task/util/ESUtils.java
  23. 82 0
      src/main/resources/application-dev.yml
  24. 104 0
      src/main/resources/application-prod.yml
  25. 2 74
      src/main/resources/application.yml
  26. 92 92
      src/test/java/com/winhc/task/DataWorksSummaryJob.java
  27. 0 22
      src/test/java/com/winhc/task/EsTest.java

+ 17 - 28
pom.xml

@@ -25,28 +25,6 @@
     </properties>
 
     <dependencies>
-        <dependency>
-            <groupId>org.elasticsearch</groupId>
-            <artifactId>elasticsearch</artifactId>
-            <version>5.6.0</version>
-        </dependency>
-        <dependency>
-            <groupId>org.elasticsearch.client</groupId>
-            <artifactId>elasticsearch-rest-client</artifactId>
-            <version>5.6.0</version>
-        </dependency>
-        <dependency>
-            <groupId>org.elasticsearch.client</groupId>
-            <artifactId>elasticsearch-rest-high-level-client</artifactId>
-            <version>5.6.0</version>
-            <exclusions>
-                <exclusion>
-                    <groupId>org.elasticsearch.client</groupId>
-                    <artifactId>elasticsearch-rest-client</artifactId>
-                </exclusion>
-            </exclusions>
-        </dependency>
-
 
         <dependency>
             <groupId>org.springframework.boot</groupId>
@@ -169,12 +147,6 @@
             <artifactId>guava</artifactId>
             <version>29.0-jre</version>
         </dependency>
-
-        <dependency>
-            <groupId>org.elasticsearch.client</groupId>
-            <artifactId>rest</artifactId>
-            <version>5.5.3</version>
-        </dependency>
         <dependency>
             <groupId>org.apache.logging.log4j</groupId>
             <artifactId>log4j-core</artifactId>
@@ -229,6 +201,23 @@
             <artifactId>hutool-all</artifactId>
             <version>5.3.7</version>
         </dependency>
+
+
+        <dependency>
+            <groupId>org.springframework.boot</groupId>
+            <artifactId>spring-boot-starter-data-elasticsearch</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>com.bbossgroups.plugins</groupId>
+            <artifactId>bboss-elasticsearch-rest-jdbc</artifactId>
+            <version>6.3.9</version>
+        </dependency>
+        <dependency>
+            <groupId>com.bbossgroups.plugins</groupId>
+            <artifactId>bboss-elasticsearch-spring-boot-starter</artifactId>
+            <version>6.3.9</version>
+        </dependency>
+
     </dependencies>
 
     <build>

+ 91 - 91
src/main/java/com/winhc/task/configuration/ElasticSearchConfiguration.java

@@ -1,91 +1,91 @@
-package com.winhc.task.configuration;
-
-import org.apache.http.HttpHost;
-import org.apache.http.auth.AuthScope;
-import org.apache.http.auth.UsernamePasswordCredentials;
-import org.apache.http.client.CredentialsProvider;
-import org.apache.http.impl.client.BasicCredentialsProvider;
-import org.apache.http.impl.nio.client.HttpAsyncClientBuilder;
-import org.elasticsearch.client.RestClient;
-import org.elasticsearch.client.RestClientBuilder;
-import org.elasticsearch.client.RestHighLevelClient;
-import org.springframework.beans.factory.annotation.Value;
-import org.springframework.context.annotation.Bean;
-import org.springframework.context.annotation.Configuration;
-
-import java.util.stream.Stream;
-
-/**
- * @Author: XuJiakai
- * @Date: 2020/9/15 20:01
- * @Description:
- */
-@Configuration
-public class ElasticSearchConfiguration {
-    @Value("${es.username}")
-    private String username;
-    @Value("${es.password}")
-    private String password;
-    @Value("${es.host}")
-    private String host;
-
-    @Value("${es.schema:http}")
-    String schema;
-    @Value(value = "${es.connect-timeout:100000}")
-    String connectTimeout;
-    @Value(value = "${es.socket-timeout:600000}")
-    String socketTimeout;
-    @Value(value = "${es.connection-request-timeout:50000}")
-    String connectionRequestTimeout;
-    @Value(value = "${es.max-conn-total:100}")
-    String maxConnTotal;
-    @Value(value = "${es.max-conn-per-route:100}")
-    String maxConnPerRoute;
-
-    @Bean
-    public RestClient bean() {
-        final CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
-        credentialsProvider.setCredentials(AuthScope.ANY,
-                new UsernamePasswordCredentials(username, password));
-        // 单击所创建的Elasticsearch实例ID,在基本信息页面获取公网地址,即为HOST。
-        return RestClient.builder(new HttpHost(host, 9200))
-                .setHttpClientConfigCallback(new RestClientBuilder.HttpClientConfigCallback() {
-                    @Override
-                    public HttpAsyncClientBuilder customizeHttpClient(HttpAsyncClientBuilder httpClientBuilder) {
-                        return httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider);
-                    }
-                }).build();
-    }
-
-
-    @Bean
-    public RestHighLevelClient getClient() {
-        HttpHost[] httpHosts = Stream.of(host.split(",")).map(host -> {
-            String[] split = host.split(":");
-            return new HttpHost(split[0], 9200, schema);
-        }).toArray(HttpHost[]::new);
-
-        // 阿里云Elasticsearch集群需要basic auth验证。
-        final CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
-        //访问用户名和密码为您创建阿里云Elasticsearch实例时设置的用户名和密码,也是Kibana控制台的登录用户名和密码。
-        credentialsProvider.setCredentials(AuthScope.ANY, new UsernamePasswordCredentials(username, password));
-
-
-        return new RestHighLevelClient(RestClient
-                .builder(httpHosts)
-                .setMaxRetryTimeoutMillis(60000 * 3)
-                .setRequestConfigCallback(builder -> {
-                    builder.setConnectTimeout(1000 * 3);
-                    builder.setSocketTimeout(60000 * 3);
-                    builder.setConnectionRequestTimeout(0);
-                    return builder;
-                })
-                .setHttpClientConfigCallback(httpAsyncClientBuilder -> {
-                    httpAsyncClientBuilder.setDefaultCredentialsProvider(credentialsProvider);
-                    httpAsyncClientBuilder.setMaxConnTotal(100 * 3);
-                    httpAsyncClientBuilder.setMaxConnPerRoute(100 * 3);
-                    return httpAsyncClientBuilder;
-                }).build()
-        );
-    }
-}
+//package com.winhc.task.configuration;
+//
+//import org.apache.http.HttpHost;
+//import org.apache.http.auth.AuthScope;
+//import org.apache.http.auth.UsernamePasswordCredentials;
+//import org.apache.http.client.CredentialsProvider;
+//import org.apache.http.impl.client.BasicCredentialsProvider;
+//import org.apache.http.impl.nio.client.HttpAsyncClientBuilder;
+//import org.elasticsearch.client.RestClient;
+//import org.elasticsearch.client.RestClientBuilder;
+//import org.elasticsearch.client.RestHighLevelClient;
+//import org.springframework.beans.factory.annotation.Value;
+//import org.springframework.context.annotation.Bean;
+//import org.springframework.context.annotation.Configuration;
+//
+//import java.util.stream.Stream;
+//
+///**
+// * @Author: XuJiakai
+// * @Date: 2020/9/15 20:01
+// * @Description:
+// */
+//@Configuration
+//public class ElasticSearchConfiguration {
+//    @Value("${es.username}")
+//    private String username;
+//    @Value("${es.password}")
+//    private String password;
+//    @Value("${es.host}")
+//    private String host;
+//
+//    @Value("${es.schema:http}")
+//    String schema;
+//    @Value(value = "${es.connect-timeout:100000}")
+//    String connectTimeout;
+//    @Value(value = "${es.socket-timeout:600000}")
+//    String socketTimeout;
+//    @Value(value = "${es.connection-request-timeout:50000}")
+//    String connectionRequestTimeout;
+//    @Value(value = "${es.max-conn-total:100}")
+//    String maxConnTotal;
+//    @Value(value = "${es.max-conn-per-route:100}")
+//    String maxConnPerRoute;
+//
+//    @Bean
+//    public RestClient bean() {
+//        final CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
+//        credentialsProvider.setCredentials(AuthScope.ANY,
+//                new UsernamePasswordCredentials(username, password));
+//        // 单击所创建的Elasticsearch实例ID,在基本信息页面获取公网地址,即为HOST。
+//        return RestClient.builder(new HttpHost(host, 9200))
+//                .setHttpClientConfigCallback(new RestClientBuilder.HttpClientConfigCallback() {
+//                    @Override
+//                    public HttpAsyncClientBuilder customizeHttpClient(HttpAsyncClientBuilder httpClientBuilder) {
+//                        return httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider);
+//                    }
+//                }).build();
+//    }
+//
+//
+//    @Bean
+//    public RestHighLevelClient getClient() {
+//        HttpHost[] httpHosts = Stream.of(host.split(",")).map(host -> {
+//            String[] split = host.split(":");
+//            return new HttpHost(split[0], 9200, schema);
+//        }).toArray(HttpHost[]::new);
+//
+//        // 阿里云Elasticsearch集群需要basic auth验证。
+//        final CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
+//        //访问用户名和密码为您创建阿里云Elasticsearch实例时设置的用户名和密码,也是Kibana控制台的登录用户名和密码。
+//        credentialsProvider.setCredentials(AuthScope.ANY, new UsernamePasswordCredentials(username, password));
+//
+//
+//        return new RestHighLevelClient(RestClient
+//                .builder(httpHosts)
+//                .setMaxRetryTimeoutMillis(60000 * 3)
+//                .setRequestConfigCallback(builder -> {
+//                    builder.setConnectTimeout(1000 * 3);
+//                    builder.setSocketTimeout(60000 * 3);
+//                    builder.setConnectionRequestTimeout(0);
+//                    return builder;
+//                })
+//                .setHttpClientConfigCallback(httpAsyncClientBuilder -> {
+//                    httpAsyncClientBuilder.setDefaultCredentialsProvider(credentialsProvider);
+//                    httpAsyncClientBuilder.setMaxConnTotal(100 * 3);
+//                    httpAsyncClientBuilder.setMaxConnPerRoute(100 * 3);
+//                    return httpAsyncClientBuilder;
+//                }).build()
+//        );
+//    }
+//}

+ 35 - 0
src/main/java/com/winhc/task/configuration/MultiESSTartConfigurer.java

@@ -0,0 +1,35 @@
+package com.winhc.task.configuration;
+
+import org.frameworkset.elasticsearch.boot.BBossESStarter;
+import org.frameworkset.elasticsearch.client.ClientInterface;
+import org.springframework.boot.context.properties.ConfigurationProperties;
+import org.springframework.context.annotation.Bean;
+import org.springframework.context.annotation.Configuration;
+import org.springframework.context.annotation.Primary;
+
+/**
+ * 配置多个es集群
+ * 指定多es数据源profile:multi-datasource
+ */
+@Configuration
+public class MultiESSTartConfigurer {
+    @Primary
+    @Bean(initMethod = "start")
+    @ConfigurationProperties("spring.elasticsearch.bboss.es6")
+    public BBossESStarter bbossESStarterEs6(){
+        return new BBossESStarter();
+    }
+
+    @Bean(initMethod = "start")
+    @ConfigurationProperties("spring.elasticsearch.bboss.es5")
+    public BBossESStarter bbossESStarterEs5(){
+        return new BBossESStarter();
+    }
+
+
+    @Bean
+    public ClientInterface bbossESClient(BBossESStarter bBossESStarter) {
+        return bBossESStarter.getRestClient();
+    }
+
+}

+ 0 - 24
src/main/java/com/winhc/task/dao/SearchDao.java

@@ -1,24 +0,0 @@
-package com.winhc.task.dao;
-
-import org.elasticsearch.index.query.QueryBuilder;
-import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
-import org.elasticsearch.search.rescore.RescoreBuilder;
-import org.elasticsearch.search.sort.SortBuilder;
-
-import java.util.List;
-
-/**
- * @author: XuJiakai
- * 2020/11/20 15:15
- */
-public interface SearchDao {
-
-    Object search(String index, String type, QueryBuilder query, SortBuilder sortBuilder, FetchSourceContext fetchSourceContext, int from, int size);
-
-    Object search(String index, String type, QueryBuilder query, int from, int size);
-
-    Object search(String index, String type, QueryBuilder query, RescoreBuilder rescoreBuilder, SortBuilder sortBuilder, FetchSourceContext fetchSourceContext, int from, int size);
-
-    boolean deleteByIds(String index, String type, List<String> ids);
-
-}

+ 0 - 122
src/main/java/com/winhc/task/dao/impl/SearchDaoImpl.java

@@ -1,122 +0,0 @@
-package com.winhc.task.dao.impl;
-
-import com.fasterxml.jackson.core.type.TypeReference;
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.winhc.task.dao.SearchDao;
-import lombok.AllArgsConstructor;
-import lombok.SneakyThrows;
-import lombok.extern.slf4j.Slf4j;
-import org.apache.http.util.EntityUtils;
-import org.elasticsearch.action.bulk.BulkRequest;
-import org.elasticsearch.action.delete.DeleteRequest;
-import org.elasticsearch.action.search.SearchRequest;
-import org.elasticsearch.action.search.SearchResponse;
-import org.elasticsearch.client.Response;
-import org.elasticsearch.client.RestClient;
-import org.elasticsearch.client.RestHighLevelClient;
-import org.elasticsearch.index.query.QueryBuilder;
-import org.elasticsearch.search.builder.SearchSourceBuilder;
-import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
-import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
-import org.elasticsearch.search.rescore.RescoreBuilder;
-import org.elasticsearch.search.sort.SortBuilder;
-import org.springframework.stereotype.Repository;
-
-import java.util.HashMap;
-import java.util.List;
-
-/**
- * @author: XuJiakai
- * 2020/11/20 15:16
- */
-@Slf4j
-@Repository
-@AllArgsConstructor
-public class SearchDaoImpl implements SearchDao {
-    private RestHighLevelClient restHighLevelClient;
-
-
-    private ObjectMapper mapper;
-    private static final TypeReference<HashMap<String, Object>> typeRef
-            = new TypeReference<HashMap<String, Object>>() {
-    };
-
-    @SneakyThrows
-    @Override
-    public Object search(String index, String type, QueryBuilder query, SortBuilder sortBuilder, FetchSourceContext fetchSourceContext, int from, int size) {
-        return search(index, type, query, null, sortBuilder, fetchSourceContext, from, size);
-    }
-
-    @Override
-    public Object search(String index, String type, QueryBuilder query, int from, int size) {
-        return search(index, type, query, null, null, from, size);
-    }
-
-    @SneakyThrows
-    @Override
-    public Object search(String index, String type, QueryBuilder query, RescoreBuilder rescoreBuilder, SortBuilder sortBuilder, FetchSourceContext fetchSourceContext, int from, int size) {
-      /*  HighlightBuilder.Field query1 = new HighlightBuilder.Field("history_name.value").highlightQuery(QueryBuilders.matchQuery("history_name.value", "华为"));
-        HighlightBuilder.Field query3 = new HighlightBuilder.Field("history_name.value.keyword").highlightQuery(QueryBuilders.matchQuery("history_name.value", "华为").boost(10000));
-        HighlightBuilder.Field query2 = new HighlightBuilder
-                .Field("cname.value")
-                .highlightQuery(QueryBuilders.matchQuery("cname.value", "华为").boost(1000));*/
-        HighlightBuilder highlightBuilder = new HighlightBuilder()
-//                .field(query1)
-//                .field(query2)
-//                .field(query3)
-                .preTags("<font color='red'>")
-                .postTags("</font>")
-                .order("score");
-        SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder()
-                .query(query)
-                .from(from)
-                .size(size)
-                .highlighter(highlightBuilder);
-
-        if (rescoreBuilder != null) {
-            searchSourceBuilder.addRescorer(rescoreBuilder)
-//                    .addRescorer(new QueryRescorerBuilder(query))
-            ;
-        }
-        if (fetchSourceContext != null) {
-            searchSourceBuilder.fetchSource(fetchSourceContext);
-        }
-        if (sortBuilder != null) {
-            searchSourceBuilder.sort(sortBuilder)
-//                    .trackScores(true)
-            ;
-        }
-
-        SearchRequest searchRequest = new SearchRequest()
-                .indices(index)
-                .types(type)
-                .source(searchSourceBuilder);
-
-        SearchResponse search = restHighLevelClient.search(searchRequest);
-        return mapper.readValue(search.toString(), typeRef);
-    }
-
-    @SneakyThrows
-    @Override
-    public boolean deleteByIds(String index, String type, List<String> ids) {
-        BulkRequest bulkRequest = new BulkRequest();
-        for (String id : ids) {
-            DeleteRequest deleteRequest = new DeleteRequest(index, type, id);
-            bulkRequest.add(deleteRequest);
-        }
-        restHighLevelClient.bulk(bulkRequest);
-        return true;
-    }
-
-
-    private final RestClient restClient;
-
-    @SneakyThrows
-    public Object test() {
-        Response get = restClient.performRequest("get", "_cat/indices/judicial_case_v*?h=index");
-        String s = EntityUtils.toString(get.getEntity());
-        System.out.println(s);
-
-        return null;
-    }
-}

+ 4 - 3
src/main/java/com/winhc/task/framework/es/EsFastScan.java

@@ -9,6 +9,7 @@ import org.apache.commons.lang3.StringUtils;
 import org.elasticsearch.action.search.SearchRequest;
 import org.elasticsearch.action.search.SearchResponse;
 import org.elasticsearch.action.search.SearchScrollRequest;
+import org.elasticsearch.client.RequestOptions;
 import org.elasticsearch.client.RestHighLevelClient;
 import org.elasticsearch.common.unit.TimeValue;
 import org.elasticsearch.index.query.WrapperQueryBuilder;
@@ -102,9 +103,9 @@ public class EsFastScan {
         }
 
         searchRequest.source(searchSourceBuilder);
-        SearchResponse searchResponse = client.search(searchRequest);
+        SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
 
-        totalHits = searchResponse.getHits().getTotalHits();
+        totalHits = searchResponse.getHits().getTotalHits().value;
         log.info("es scan start shard total hits: {}", totalHits);
         int n = searchResponse.getHits().getHits().length;
         if (n != 0) {
@@ -135,7 +136,7 @@ public class EsFastScan {
     @SneakyThrows
     private int scanData(ThreadPoolExecutor executorService, String scrollId) {
         SearchScrollRequest scroll = new SearchScrollRequest(scrollId).scroll(this.scroll);
-        SearchResponse searchResponse = client.searchScroll(scroll);
+        SearchResponse searchResponse = client.searchScroll(scroll,RequestOptions.DEFAULT);
         int num = searchResponse.getHits().getHits().length;
         if (num == 0) {
             return num;

+ 0 - 123
src/main/java/com/winhc/task/job/BatchQueryEsTest.java

@@ -1,123 +0,0 @@
-package com.winhc.task.job;
-
-import com.fasterxml.jackson.core.type.TypeReference;
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.mongodb.client.MongoCollection;
-import lombok.AllArgsConstructor;
-import lombok.SneakyThrows;
-import lombok.extern.slf4j.Slf4j;
-import org.bson.Document;
-import org.elasticsearch.action.search.SearchRequest;
-import org.elasticsearch.action.search.SearchResponse;
-import org.elasticsearch.client.RestHighLevelClient;
-import org.elasticsearch.index.query.BoolQueryBuilder;
-import org.elasticsearch.index.query.QueryBuilders;
-import org.elasticsearch.search.aggregations.AggregationBuilders;
-import org.elasticsearch.search.aggregations.metrics.sum.SumAggregationBuilder;
-import org.elasticsearch.search.builder.SearchSourceBuilder;
-import org.springframework.data.mongodb.core.MongoTemplate;
-import org.springframework.stereotype.Component;
-import java.util.*;
-import static com.winhc.task.util.MaxBatchQueryUtils.addTerms;
-
-/**
- * @author: XuJiakai
- * 2021/3/1 09:43
- */
-@Slf4j
-@Component
-@AllArgsConstructor
-public class BatchQueryEsTest {
-
-    private ObjectMapper mapper;
-    private RestHighLevelClient restHighLevelClient;
-    private final MongoTemplate mongoTemplate;
-
-    private static final TypeReference<HashMap<String, Object>> typeRef
-            = new TypeReference<HashMap<String, Object>>() {
-    };
-
-    public void start(int batchSize, int round) {
-        MongoCollection<Document> company_id = mongoTemplate.getCollection("xjk_test_es_company_id");
-        Set<String> set = new HashSet<String>();
-
-        for (Document doc : company_id.find().skip(batchSize * (round - 1)).batchSize(batchSize)) {
-            String id = doc.getString("_id");
-            set.add(id);
-
-            if (set.size() >= batchSize) {
-                break;
-            }
-        }
-
-        Object query = query2(set);
-        System.out.println();
-        System.out.println(query);
-        System.out.println();
-    }
-
-
-    @SneakyThrows
-    private Object query2(Set<String> ids) {
-        log.info("start... size: {}", ids.size());
-
-        BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
-
-        boolQuery.filter(addTerms(ids, "_id"));
-
-        List<SumAggregationBuilder> list = Arrays.asList(
-                AggregationBuilders.sum("open_announcement_1").field("summary.company_court_open_announcement_deleted_0_defendant")
-//                , AggregationBuilders.sum("open_announcement_2").field("summary.company_court_open_announcement_deleted_0_plaintiff")
-//                ,
-//                AggregationBuilders.sum("3").field("summary.company_app_info_del_1")
-//                , AggregationBuilders.sum("4").field("summary.company_staff_del_1")
-//                , AggregationBuilders.sum("5").field("summary.company_icp_del_0")
-//                , AggregationBuilders.sum("6").field("summary.company_tm_del_1")
-        );
-
-
-        SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder()
-                .query(boolQuery)
-                .size(0);
-
-        for (SumAggregationBuilder builder : list) {
-            searchSourceBuilder.aggregation(builder);
-        }
-
-
-        SearchRequest searchRequest = new SearchRequest()
-                .indices("out_es_summary_v1")
-                .types("doc")
-                .source(searchSourceBuilder);
-        log.info("start!");
-        SearchResponse search = restHighLevelClient.search(searchRequest);
-        log.info("end!");
-        return mapper.readValue(search.toString(), typeRef);
-    }
-
-
-    @SneakyThrows
-    private Object query(Set<String> ids) {
-        log.info("start... size: {}", ids.size());
-
-        BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
-
-        boolQuery.must(addTerms(ids, "defendant_info.litigant_id"));
-        boolQuery.must(addTerms(ids, "plaintiff_info.litigant_id"));
-        boolQuery.filter(QueryBuilders.termQuery("deleted", "0"));
-
-        SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder()
-                .query(boolQuery)
-                .size(0);
-
-        SearchRequest searchRequest = new SearchRequest()
-                .indices("winhc_index_company_court_open_announcement_v1")
-                .types("doc")
-                .source(searchSourceBuilder);
-        log.info("start!");
-        SearchResponse search = restHighLevelClient.search(searchRequest);
-        log.info("end!");
-        return mapper.readValue(search.toString(), typeRef);
-    }
-
-}

+ 159 - 159
src/main/java/com/winhc/task/job/CalcSummaryJob.java

@@ -1,159 +1,159 @@
-package com.winhc.task.job;
-
-import cn.hutool.core.lang.Tuple;
-import com.alibaba.hologres.client.HoloClient;
-import com.alibaba.hologres.client.exception.HoloClientException;
-import com.winhc.task.bean.Alias;
-import com.winhc.task.bean.JobArgs;
-import com.winhc.task.common.Constant;
-import com.winhc.task.common.SummaryArgs;
-import com.winhc.task.util.FreeMarkUtil;
-import com.winhc.task.util.HoloUtils;
-import lombok.extern.slf4j.Slf4j;
-import org.apache.commons.lang3.StringUtils;
-import org.springframework.beans.factory.annotation.Autowired;
-import org.springframework.stereotype.Service;
-
-import java.io.IOException;
-import java.util.*;
-import java.util.stream.Collectors;
-import java.util.stream.Stream;
-
-@Slf4j
-@Service
-public class CalcSummaryJob {
-    @Autowired
-    SingleSynHoloJob holoSynJobs;
-    @Autowired
-    MultipleSynHoloJob multipleSynHoloJob;
-    @Autowired
-    EsIndexJobs esIndexJobs;
-    @Autowired
-    FreeMarkUtil freeMarkUtil;
-
-    public void start(List<String> list, JobArgs jobArgs, Boolean synData) throws HoloClientException, IOException {
-
-        String summary_company = "ng_rt_summary_company";
-        String summary_person = "ng_rt_summary_person";
-
-        ArrayList<String> tnList = new ArrayList<>();
-        //初始化sql
-        List<Tuple> collect = initSql(list, jobArgs, tnList);
-        //计算摘要
-        if (synData) {
-            calcSummary(jobArgs, collect);
-        }
-        //建索引
-        createIndex(jobArgs, tnList);
-        //同步数据
-        if (synData) {
-            multipleSynHoloJob.start(tnList, jobArgs);
-        }
-        //切换个人,公司索引到生产
-        //switch_index(jobArgs, summary_company, summary_person, tnList);
-
-    }
-
-    private void switch_index(JobArgs jobArgs, String summary_company, String summary_person, ArrayList<String> tnList) throws IOException {
-        for (String tn : tnList) {
-            if (Constant.PERSON_SUMMARYS.contains(tn)) {
-                switchIndex(jobArgs.getTargetIndexPrefix(), summary_person, "person", jobArgs.getTargetIndexSuffix(), tn);
-            }
-            switchIndex(jobArgs.getTargetIndexPrefix(), summary_company, "", jobArgs.getTargetIndexSuffix(), tn);
-        }
-    }
-
-    private void createIndex(JobArgs jobArgs, ArrayList<String> tnList) {
-        for (String tn : tnList) {
-            try {
-                log.info("start create index tn : {} ", tn);
-                if (Constant.PERSON_SUMMARYS.contains(tn)) {
-                    tranIndex(jobArgs, tn, "person");
-                }
-                tranIndex(jobArgs, tn, "");
-            } catch (Exception e) {
-                log.error("create index error : {} ,tn : {}", e.getMessage(), tn);
-            }
-        }
-    }
-
-    private void calcSummary(JobArgs jobArgs, List<Tuple> collect) throws HoloClientException {
-        for (int i = 0; i < collect.size(); i++) {
-            HoloClient holoClient = HoloUtils.init();
-            String s0 = "";
-            //初始化清空table
-            if (i == 0) {
-                s0 = "truncate table " + jobArgs.getHoloTable();
-            }
-            String tn = collect.get(i).get(0).toString();
-            String s1 = "analyze " + jobArgs.getHoloTable();
-            String s2 = "analyze ng_rt_" + tn;
-            Stream.of(s0, s1, s2, collect.get(i).get(1)).filter(StringUtils::isNotBlank).forEach(x -> HoloUtils.exexSql(holoClient, x));
-            holoClient.close();
-        }
-    }
-
-    private List<Tuple> initSql(List<String> list, JobArgs jobArgs, ArrayList<String> tnList) {
-        return SummaryArgs.SUMMARY_ARGS.entrySet()
-                .stream()
-                .filter(x -> (list.isEmpty() || list.contains(x.getKey())))
-                .map(x -> {
-                    String tn = x.getKey();
-                    SummaryArgs args = x.getValue();
-                    tnList.add(tn);
-                    String p_sql = args.getArgsInfo().stream().map(a -> {
-                        Map<String, String> m = new HashMap<>();
-                        m.put("holoTable", jobArgs.getHoloTable());
-                        m.put("tn", tn);
-                        m.put("keyno", a.getFilterField());
-                        if (StringUtils.isNotBlank(a.getCategory())) {
-                            m.put("category", "_" + a.getCategory());
-                        } else {
-                            m.put("category", "");
-                        }
-                        m.put("groupField", a.getGroupField());
-                        m.put("condition", "in ('0','1')");
-                        return freeMarkUtil.genStr("agg_sample_v1.ftl", m);
-                    }).collect(Collectors.joining("\nUNION ALL\n"));
-                    Map<String, String> m2 = new HashMap<>();
-                    m2.put("table_view", p_sql);
-                    m2.put("tn", tn);
-                    String calc_sql = freeMarkUtil.genStr("agg_sample_v2.ftl", m2);
-                    return new Tuple(tn, calc_sql);
-                }).collect(Collectors.toList());
-    }
-
-    private void tranIndex(JobArgs jobArgs, String tn, String type) throws IOException {
-        String c_index = Stream.of(jobArgs.getTargetIndexPrefix(), type, tn, jobArgs.getTargetIndexSuffix())
-                .filter(StringUtils::isNotBlank).collect(Collectors.joining("_"));
-        //esIndexJobs.deletedIndex(c_index);
-        esIndexJobs.createIndex(c_index);
-    }
-
-    private void switchIndex(String targetIndexPre, String summary_company, String type, String targetIndexSuf, String tn) throws IOException {
-        String index = Stream.of(targetIndexPre, type, tn, targetIndexSuf)
-                .filter(StringUtils::isNotBlank)
-                .collect(Collectors.joining("_"));
-        String alias = Stream.of(targetIndexPre, type, tn)
-                .filter(StringUtils::isNotBlank)
-                .collect(Collectors.joining("_"));
-        List<Alias> addList = Stream.of(
-                Alias.builder().alias(summary_company).index(index).build(),
-                Alias.builder().alias(alias).index(index).build()
-        ).collect(Collectors.toList());
-        List<Alias> removeList = esIndexJobs.getAliases(Stream.of(targetIndexPre, type, tn).filter(StringUtils::isNotBlank).collect(Collectors.joining("_")))
-                .stream().filter(x -> !x.getIndex().contains(targetIndexSuf)).collect(Collectors.toList());
-        esIndexJobs.addRemoveAliases(addList, removeList);
-        //删除多余索引
-        removeList.stream().filter(x -> !x.getIndex().contains(targetIndexSuf))
-                .map(Alias::getIndex).distinct().forEach(i -> {
-            try {
-                esIndexJobs.deletedIndex(i);
-            } catch (Exception e) {
-                e.printStackTrace();
-                log.error("deleted index error {}", i);
-                System.exit(-1);
-            }
-        });
-    }
-}
+//package com.winhc.task.job;
+//
+//import cn.hutool.core.lang.Tuple;
+//import com.alibaba.hologres.client.HoloClient;
+//import com.alibaba.hologres.client.exception.HoloClientException;
+//import com.winhc.task.bean.Alias;
+//import com.winhc.task.bean.JobArgs;
+//import com.winhc.task.common.Constant;
+//import com.winhc.task.common.SummaryArgs;
+//import com.winhc.task.util.FreeMarkUtil;
+//import com.winhc.task.util.HoloUtils;
+//import lombok.extern.slf4j.Slf4j;
+//import org.apache.commons.lang3.StringUtils;
+//import org.springframework.beans.factory.annotation.Autowired;
+//import org.springframework.stereotype.Service;
+//
+//import java.io.IOException;
+//import java.util.*;
+//import java.util.stream.Collectors;
+//import java.util.stream.Stream;
+//
+//@Slf4j
+//@Service
+//public class CalcSummaryJob {
+//    @Autowired
+//    SingleSynHoloJob holoSynJobs;
+//    @Autowired
+//    MultipleSynHoloJob multipleSynHoloJob;
+//    @Autowired
+//    EsIndexJobs esIndexJobs;
+//    @Autowired
+//    FreeMarkUtil freeMarkUtil;
+//
+//    public void start(List<String> list, JobArgs jobArgs, Boolean synData) throws HoloClientException, IOException {
+//
+//        String summary_company = "ng_rt_summary_company";
+//        String summary_person = "ng_rt_summary_person";
+//
+//        ArrayList<String> tnList = new ArrayList<>();
+//        //初始化sql
+//        List<Tuple> collect = initSql(list, jobArgs, tnList);
+//        //计算摘要
+//        if (synData) {
+//            calcSummary(jobArgs, collect);
+//        }
+//        //建索引
+//        createIndex(jobArgs, tnList);
+//        //同步数据
+//        if (synData) {
+//            multipleSynHoloJob.start(tnList, jobArgs);
+//        }
+//        //切换个人,公司索引到生产
+//        //switch_index(jobArgs, summary_company, summary_person, tnList);
+//
+//    }
+//
+//    private void switch_index(JobArgs jobArgs, String summary_company, String summary_person, ArrayList<String> tnList) throws IOException {
+//        for (String tn : tnList) {
+//            if (Constant.PERSON_SUMMARYS.contains(tn)) {
+//                switchIndex(jobArgs.getTargetIndexPrefix(), summary_person, "person", jobArgs.getTargetIndexSuffix(), tn);
+//            }
+//            switchIndex(jobArgs.getTargetIndexPrefix(), summary_company, "", jobArgs.getTargetIndexSuffix(), tn);
+//        }
+//    }
+//
+//    private void createIndex(JobArgs jobArgs, ArrayList<String> tnList) {
+//        for (String tn : tnList) {
+//            try {
+//                log.info("start create index tn : {} ", tn);
+//                if (Constant.PERSON_SUMMARYS.contains(tn)) {
+//                    tranIndex(jobArgs, tn, "person");
+//                }
+//                tranIndex(jobArgs, tn, "");
+//            } catch (Exception e) {
+//                log.error("create index error : {} ,tn : {}", e.getMessage(), tn);
+//            }
+//        }
+//    }
+//
+//    private void calcSummary(JobArgs jobArgs, List<Tuple> collect) throws HoloClientException {
+//        for (int i = 0; i < collect.size(); i++) {
+//            HoloClient holoClient = HoloUtils.init();
+//            String s0 = "";
+//            //初始化清空table
+//            if (i == 0) {
+//                s0 = "truncate table " + jobArgs.getHoloTable();
+//            }
+//            String tn = collect.get(i).get(0).toString();
+//            String s1 = "analyze " + jobArgs.getHoloTable();
+//            String s2 = "analyze ng_rt_" + tn;
+//            Stream.of(s0, s1, s2, collect.get(i).get(1)).filter(StringUtils::isNotBlank).forEach(x -> HoloUtils.exexSql(holoClient, x));
+//            holoClient.close();
+//        }
+//    }
+//
+//    private List<Tuple> initSql(List<String> list, JobArgs jobArgs, ArrayList<String> tnList) {
+//        return SummaryArgs.SUMMARY_ARGS.entrySet()
+//                .stream()
+//                .filter(x -> (list.isEmpty() || list.contains(x.getKey())))
+//                .map(x -> {
+//                    String tn = x.getKey();
+//                    SummaryArgs args = x.getValue();
+//                    tnList.add(tn);
+//                    String p_sql = args.getArgsInfo().stream().map(a -> {
+//                        Map<String, String> m = new HashMap<>();
+//                        m.put("holoTable", jobArgs.getHoloTable());
+//                        m.put("tn", tn);
+//                        m.put("keyno", a.getFilterField());
+//                        if (StringUtils.isNotBlank(a.getCategory())) {
+//                            m.put("category", "_" + a.getCategory());
+//                        } else {
+//                            m.put("category", "");
+//                        }
+//                        m.put("groupField", a.getGroupField());
+//                        m.put("condition", "in ('0','1')");
+//                        return freeMarkUtil.genStr("agg_sample_v1.ftl", m);
+//                    }).collect(Collectors.joining("\nUNION ALL\n"));
+//                    Map<String, String> m2 = new HashMap<>();
+//                    m2.put("table_view", p_sql);
+//                    m2.put("tn", tn);
+//                    String calc_sql = freeMarkUtil.genStr("agg_sample_v2.ftl", m2);
+//                    return new Tuple(tn, calc_sql);
+//                }).collect(Collectors.toList());
+//    }
+//
+//    private void tranIndex(JobArgs jobArgs, String tn, String type) throws IOException {
+//        String c_index = Stream.of(jobArgs.getTargetIndexPrefix(), type, tn, jobArgs.getTargetIndexSuffix())
+//                .filter(StringUtils::isNotBlank).collect(Collectors.joining("_"));
+//        //esIndexJobs.deletedIndex(c_index);
+//        esIndexJobs.createIndex(c_index);
+//    }
+//
+//    private void switchIndex(String targetIndexPre, String summary_company, String type, String targetIndexSuf, String tn) throws IOException {
+//        String index = Stream.of(targetIndexPre, type, tn, targetIndexSuf)
+//                .filter(StringUtils::isNotBlank)
+//                .collect(Collectors.joining("_"));
+//        String alias = Stream.of(targetIndexPre, type, tn)
+//                .filter(StringUtils::isNotBlank)
+//                .collect(Collectors.joining("_"));
+//        List<Alias> addList = Stream.of(
+//                Alias.builder().alias(summary_company).index(index).build(),
+//                Alias.builder().alias(alias).index(index).build()
+//        ).collect(Collectors.toList());
+//        List<Alias> removeList = esIndexJobs.getAliases(Stream.of(targetIndexPre, type, tn).filter(StringUtils::isNotBlank).collect(Collectors.joining("_")))
+//                .stream().filter(x -> !x.getIndex().contains(targetIndexSuf)).collect(Collectors.toList());
+//        esIndexJobs.addRemoveAliases(addList, removeList);
+//        //删除多余索引
+//        removeList.stream().filter(x -> !x.getIndex().contains(targetIndexSuf))
+//                .map(Alias::getIndex).distinct().forEach(i -> {
+//            try {
+//                esIndexJobs.deletedIndex(i);
+//            } catch (Exception e) {
+//                e.printStackTrace();
+//                log.error("deleted index error {}", i);
+//                System.exit(-1);
+//            }
+//        });
+//    }
+//}

+ 0 - 42
src/main/java/com/winhc/task/job/DeleteHbaseByMongoJob.java

@@ -1,42 +0,0 @@
-package com.winhc.task.job;
-
-import com.mongodb.client.MongoDatabase;
-import com.winhc.task.framework.mongo.MongoDbFastScan;
-import com.winhc.task.service.HbaseOperationService;
-import com.winhc.task.dao.SearchDao;
-import lombok.AllArgsConstructor;
-import lombok.extern.slf4j.Slf4j;
-import org.bson.Document;
-import org.springframework.data.mongodb.core.MongoTemplate;
-import org.springframework.stereotype.Component;
-
-import java.util.List;
-import java.util.function.Consumer;
-import java.util.stream.Collectors;
-
-/**
- * @author: XuJiakai
- * 2020/10/21 09:52
- */
-@Slf4j
-@Component
-@AllArgsConstructor
-public class DeleteHbaseByMongoJob {
-    private final MongoTemplate mongoTemplate;
-    private final HbaseOperationService hbaseOperationService;
-    private final SearchDao searchDao;
-
-    public void start() {
-        MongoDatabase db = mongoTemplate.getDb();
-        Consumer<List<Document>> func = list -> {
-            List<String> cids = list.stream().map(d -> d.getString("id")).collect(Collectors.toList());
-//            System.out.println(cids);
-//            hbaseOperationService.deleteByRowkey("COMPANY_DYNAMIC", cids);
-            searchDao.deleteByIds("winhc-dynamic", "company", cids);
-        };
-
-        MongoDbFastScan mongoDbFastScan = new MongoDbFastScan("xjk_fix_company_dynamic", func, db)
-                .batchSize(200).threadNum(5);
-        mongoDbFastScan.scan();
-    }
-}

+ 0 - 153
src/main/java/com/winhc/task/job/EsIndexJobs.java

@@ -1,153 +0,0 @@
-package com.winhc.task.job;
-
-import com.winhc.task.bean.Alias;
-import com.winhc.task.common.Constant;
-import lombok.AllArgsConstructor;
-import lombok.extern.slf4j.Slf4j;
-import lombok.val;
-import org.apache.http.Header;
-import org.apache.http.entity.ContentType;
-import org.apache.http.nio.entity.NStringEntity;
-import org.apache.http.util.EntityUtils;
-import org.elasticsearch.client.Response;
-import org.elasticsearch.client.RestClient;
-import org.springframework.stereotype.Component;
-
-import java.io.IOException;
-import java.util.*;
-import java.util.stream.Collectors;
-import java.util.stream.Stream;
-
-
-@Slf4j
-@Component
-@AllArgsConstructor
-public class EsIndexJobs {
-    private RestClient restClient;
-
-    public void start2() throws IOException {
-
-
-        ArrayList<Alias> list = new ArrayList<>();
-        List<Alias> company = getAliasesV2("ng_rt_summary_company");
-        List<Alias> person = getAliasesV2("ng_rt_summary_person");
-        list.addAll(company);
-        list.addAll(person);
-        List<String> index = list.stream().map(Alias::getIndex).filter(x -> x.contains("v1")).distinct().collect(Collectors.toList());
-        index.forEach(System.out::println);
-        index.forEach(x1 -> {
-            System.out.println(x1);
-            deletedIndex(x1);
-        });
-
-    }
-
-    public void start() throws IOException {
-
-        //List<String> person_index = Arrays.asList("company_change", "company_staff", "company_holder");
-//        List<String> person_index = Arrays.asList("company_dishonest_info", "company_zxr", "company_zxr_restrict", "company_zxr_final_case"
-//                , "zxr_evaluate", "zxr_evaluate_results");
-
-//        List<String> index = Arrays.asList("bankruptcy_open_case", "company_court_open_announcement", "company_court_announcement"
-//                , "company_send_announcement", "company_court_register", "litigation_mediation", "restrictions_on_exit", "auction_tracking");
-
-        List<String> index = Arrays.asList("property_rights_transaction");
-
-        index.forEach(x -> {
-            try {
-                //新建公司摘要索引
-                deletedIndex("ng_rt_summary_" + x + "_v1");
-                createIndex("ng_rt_summary_" + x + "_v1");
-                //加入公司别名
-                addOrRemoveAliases("ng_rt_summary_" + x + "_v1", "ng_rt_summary_company", "add");
-                addOrRemoveAliases("ng_rt_summary_" + x + "_v1", "ng_rt_summary_" + x, "add");
-                if (Constant.PERSON_SUMMARYS.contains(x)) {
-                    //新建个人摘要索引
-                    deletedIndex("ng_rt_summary_person_" + x + "_v1");
-                    createIndex("ng_rt_summary_person_" + x + "_v1");
-                    //加入个人别名
-                    addOrRemoveAliases("ng_rt_summary_person_" + x + "_v1", "ng_rt_summary_person", "add");
-                    addOrRemoveAliases("ng_rt_summary_person_" + x + "_v1", "ng_rt_summary_person_" + x, "add");
-                }
-            } catch (IOException e) {
-                e.printStackTrace();
-            }
-        });
-
-    }
-
-    public void deletedIndex(String index) {
-        try {
-            action("DELETE", index, null);
-        } catch (Exception e) {
-            e.printStackTrace();
-        }
-    }
-
-    public void createIndex(String index) throws IOException {
-        String dsl = "{\n" +
-                "  \"settings\": {\n" +
-                "    \"index\": {\n" +
-                "      \"number_of_shards\": \"1\",\n" +
-                "      \"number_of_replicas\": \"0\"\n" +
-                "    }\n" +
-                "  }\n" +
-                "}";
-
-        action("PUT", index, dsl);
-    }
-
-    public void addOrRemoveAliases(String index, String alias, String action) throws IOException {
-        String dsl = "{\n" +
-                "  \"actions\": [\n" +
-                "    {\n" +
-                "      \"" + action + "\": {\n" +
-                "        \"alias\": \"" + alias + "\",\n" +
-                "        \"index\": \"" + index + "\"\n" +
-                "      }\n" +
-                "    }\n" +
-                "  ]\n" +
-                "}";
-        action("POST", "/_aliases", dsl);
-    }
-
-
-    public List<Alias> getAliases(String aliases) throws IOException {
-        Response response = restClient.performRequest("GET", "_cat/aliases?h=alias,index", new HashMap<>());
-        return Arrays.stream(EntityUtils.toString(response.getEntity()).split("\n")).map(x -> {
-            String[] arr = x.split(" +");
-            return Alias.builder().alias(arr[0]).index(arr[1]).build();
-        }).filter(y -> y.getIndex().contains(aliases)).collect(Collectors.toList());
-    }
-
-    public List<Alias> getAliasesV2(String aliases) throws IOException {
-        Response response = restClient.performRequest("GET", "_cat/aliases?h=alias,index", new HashMap<>());
-        return Arrays.stream(EntityUtils.toString(response.getEntity()).split("\n")).map(x -> {
-            String[] arr = x.split(" +");
-            return Alias.builder().alias(arr[0]).index(arr[1]).build();
-        }).filter(y -> y.getAlias().equalsIgnoreCase(aliases)).collect(Collectors.toList());
-    }
-
-    public Boolean addRemoveAliases(List<Alias> addList, List<Alias> removeList) throws IOException {
-        String body = Stream.of(trans("add", addList), trans("remove", removeList))
-                .flatMap(Collection::stream).collect(Collectors.joining(",", "{\"actions\":[", "]}"));
-        action("POST", "/_aliases", body);
-        return true;
-    }
-
-    public List<String> trans(String type, List<Alias> list) {
-        return list.stream().map(x -> "{ \"" + type + "\": { \"index\": \"" + x.getIndex() + "\" , \"alias\" : \"" + x.getAlias() + "\" }}").collect(Collectors.toList());
-    }
-
-    public String action(String method, String endpoint, String body) throws IOException {
-        log.info("method : {} , endpoint : {} , body : \n{}", method, endpoint, body);
-        Response res;
-        if (body == null) {
-            res = restClient.performRequest(method, endpoint, new HashMap<>());
-        } else {
-            res = restClient.performRequest(method, endpoint, new HashMap<>(), new NStringEntity(body, ContentType.APPLICATION_JSON));
-        }
-        return EntityUtils.toString(res.getEntity());
-    }
-
-}

+ 0 - 70
src/main/java/com/winhc/task/job/EsQueryAggSumJobs.java

@@ -1,70 +0,0 @@
-package com.winhc.task.job;
-
-import lombok.AllArgsConstructor;
-import lombok.extern.slf4j.Slf4j;
-import org.elasticsearch.action.search.SearchRequest;
-import org.elasticsearch.action.search.SearchResponse;
-import org.elasticsearch.client.RestHighLevelClient;
-import org.elasticsearch.index.query.BoolQueryBuilder;
-import org.elasticsearch.index.query.QueryBuilders;
-import org.elasticsearch.search.aggregations.AggregationBuilders;
-import org.elasticsearch.search.builder.SearchSourceBuilder;
-import org.springframework.stereotype.Component;
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.List;
-
-
-@Slf4j
-@Component
-@AllArgsConstructor
-public class EsQueryAggSumJobs {
-    private RestHighLevelClient restHighLevelClient;
-
-    public void start(String index, String type, List<String> companyIds) throws IOException {
-        query(index, type, companyIds);
-    }
-
-    public void query(String index, String type, List<String> companyIds) throws IOException {
-        long start = System.currentTimeMillis();
-        BoolQueryBuilder queryBuilder = QueryBuilders.boolQuery()
-                .should(QueryBuilders.termsQuery("company_id", companyIds));
-        SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
-        searchSourceBuilder.query(queryBuilder);
-        searchSourceBuilder.size(0);
-        aggs(searchSourceBuilder);
-        SearchRequest searchRequest = new SearchRequest()
-                .indices(index)
-                .types(type)
-                .source(searchSourceBuilder);
-        SearchResponse response = restHighLevelClient.search(searchRequest);
-        System.out.println(response.toString());
-        log.info("query size: {} , cost: {}", companyIds.size(), System.currentTimeMillis() - start);
-    }
-
-    public void aggs(SearchSourceBuilder searchSourceBuilder) {
-        List<String> list = Arrays.asList("company_equity_pledge_del_0", "company_equity_pledge_del_1", "company_env_punishment_del_0", "company_env_punishment_del_1",
-                "company_liquidating_info_del_0", "company_liquidating_info_del_1", "company_court_open_announcement_del_0_plaintiff",
-                "company_court_open_announcement_del_1_plaintiff", "company_court_open_announcement_del_0_defendant", "company_court_open_announcement_del_1_defendant",
-                "company_dishonest_info_del_0", "company_dishonest_info_del_1", "company_zxr_restrict_del_0", "company_zxr_restrict_del_1",
-                "company_abnormal_info_del_0", "company_abnormal_info_del_1", "company_public_announcement_del_0", "company_public_announcement_del_1",
-                "company_illegal_info_del_0", "company_illegal_info_del_1", "company_judicial_assistance_del_0_related", "company_judicial_assistance_del_1_related",
-                "company_judicial_assistance_del_0_executed_person", "company_judicial_assistance_del_1_executed_person", "company_equity_info_del_0_related",
-                "company_equity_info_del_1_related", "company_equity_info_del_0_pledgor", "company_equity_info_del_1_pledgor", "company_equity_info_del_0_pledgee", "company_equity_info_del_1_pledgee",
-                "company_court_announcement_del_0_plaintiff", "company_court_announcement_del_1_plaintiff", "company_court_announcement_del_0_litigant", "company_court_announcement_del_1_litigant", "company_send_announcement_del_0_plaintiff",
-                "company_send_announcement_del_1_plaintiff", "company_send_announcement_del_0_defendant", "company_send_announcement_del_1_defendant", "company_zxr_final_case_del_0", "company_zxr_final_case_del_1", "company_court_register_del_0_plaintiff",
-                "company_court_register_del_1_plaintiff", "company_court_register_del_0_defendant", "company_court_register_del_1_defendant", "company_ipr_pledge_del_0_related", "company_ipr_pledge_del_1_related", "company_ipr_pledge_del_0_pledgor",
-                "company_ipr_pledge_del_1_pledgor", "company_ipr_pledge_del_0_pledgee", "company_ipr_pledge_del_1_pledgee", "zxr_evaluate_del_0", "zxr_evaluate_del_1", "zxr_evaluate_results_del_0", "zxr_evaluate_results_del_1", "restrictions_on_exit_del_0", "restrictions_on_exit_del_1",
-                "company_zxr_del_0", "company_zxr_del_1", "company_punishment_info_del_0", "company_punishment_info_del_1", "company_tax_contravention_del_0", "company_tax_contravention_del_1", "company_own_tax_del_0", "company_own_tax_del_1", "company_check_info_del_0", "company_check_info_del_1", "company_punishment_info_creditchina_del_0",
-                "company_punishment_info_creditchina_del_1", "company_mortgage_info_del_0", "company_mortgage_info_del_1", "company_brief_cancel_announcement_del_0", "company_brief_cancel_announcement_del_1", "company_double_random_check_info_del_0", "company_double_random_check_info_del_1", "auction_tracking_del_0", "auction_tracking_del_1",
-                "wenshu_detail_v2_del_0_plaintiff", "wenshu_detail_v2_del_1_plaintiff", "wenshu_detail_v2_del_0_defendant", "wenshu_detail_v2_del_1_defendant",
-                "company_staff_del_0", "company_staff_del_1", "company_holder_del_0", "company_holder_del_1", "company_change_del_0", "company_change_del_1"
-        );
-        list.stream().forEach(x -> {
-            searchSourceBuilder.aggregation(AggregationBuilders.sum(x+"_agg")
-                    .field("sumarry." + x));
-        });
-    }
-
-
-}

+ 0 - 85
src/main/java/com/winhc/task/job/EsQueryListJobs.java

@@ -1,85 +0,0 @@
-package com.winhc.task.job;
-
-import com.winhc.task.framework.es.EsFastScan;
-import lombok.AllArgsConstructor;
-import lombok.extern.slf4j.Slf4j;
-import org.elasticsearch.action.search.SearchRequest;
-import org.elasticsearch.action.search.SearchResponse;
-import org.elasticsearch.client.RestHighLevelClient;
-import org.elasticsearch.index.query.BoolQueryBuilder;
-import org.elasticsearch.index.query.InnerHitBuilder;
-import org.elasticsearch.index.query.QueryBuilders;
-import org.elasticsearch.search.SearchHit;
-import org.elasticsearch.search.builder.SearchSourceBuilder;
-import org.elasticsearch.search.collapse.CollapseBuilder;
-import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
-import org.elasticsearch.search.sort.SortBuilders;
-import org.elasticsearch.search.sort.SortOrder;
-import org.springframework.stereotype.Component;
-import java.io.IOException;
-import java.util.List;
-import java.util.function.Consumer;
-
-
-@Slf4j
-@Component
-@AllArgsConstructor
-public class EsQueryListJobs {
-    private RestHighLevelClient restHighLevelClient;
-
-    public void start(String index, String type) throws IOException {
-//        Consumer<SearchHit[]> func = list -> {
-//            ArrayList<String> l = new ArrayList<>();
-//            Arrays.stream(list).forEach(d -> {
-//                l.add(d.getId());
-//            });
-//            try {
-//                query(index, type, l);
-//            } catch (IOException e) {
-//                e.printStackTrace();
-//            }
-//        };
-//        queryIds(func);
-    }
-
-    public void queryIds(Consumer<SearchHit[]> func) {
-        String dsl = "{\n" +
-                "  \"query\": {\n" +
-                "    \"match_all\": {}\n" +
-                "  }\n" +
-                "}";
-        new EsFastScan(restHighLevelClient, func, "winhc-company-v8_4", "company", dsl, 1000, 1).scan();
-    }
-
-    public void query(String index, String type, List<String> companyIds) throws IOException {
-        long start = System.currentTimeMillis();
-        BoolQueryBuilder queryBuilder = QueryBuilders.boolQuery()
-                .should(QueryBuilders.termsQuery("company_id", companyIds));
-        FetchSourceContext fetchSourceContext = new FetchSourceContext(true, null, null);
-        CollapseBuilder collapseBuilder = new CollapseBuilder("company_id");
-        InnerHitBuilder innerHitBuilder = new InnerHitBuilder().setName("company_agg")
-                .setFrom(0)
-                .setSize(100)
-                .setTrackScores(true)
-                .setIgnoreUnmapped(true)
-                .setFetchSourceContext(fetchSourceContext)
-                .addSort(SortBuilders.fieldSort("company_id")
-                        .order(SortOrder.DESC));
-        collapseBuilder.setInnerHits(innerHitBuilder);
-        SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder()
-                .query(queryBuilder)
-                .from(0)
-                .size(companyIds.size())
-                .sort("company_id", SortOrder.DESC)
-                .collapse(collapseBuilder);
-
-        SearchRequest searchRequest = new SearchRequest()
-                .indices(index).types(type)
-                .source(searchSourceBuilder);
-        SearchResponse response = restHighLevelClient.search(searchRequest);
-        System.out.println(response.toString());
-        log.info("query size: {} , cost: {}", companyIds.size(), System.currentTimeMillis() - start);
-    }
-
-
-}

+ 0 - 104
src/main/java/com/winhc/task/job/EsScanJobSumAgg.java

@@ -1,104 +0,0 @@
-package com.winhc.task.job;
-
-import com.alibaba.fastjson.JSONObject;
-import com.google.common.util.concurrent.ThreadFactoryBuilder;
-import com.winhc.task.common.Constant;
-import com.winhc.task.framework.es.EsFastScan;
-import com.winhc.task.util.ThreadPoolFactory;
-import lombok.AllArgsConstructor;
-import lombok.extern.slf4j.Slf4j;
-import org.elasticsearch.action.bulk.BulkRequest;
-import org.elasticsearch.action.update.UpdateRequest;
-import org.elasticsearch.client.RestHighLevelClient;
-import org.elasticsearch.common.xcontent.XContentType;
-import org.elasticsearch.search.SearchHit;
-import org.springframework.stereotype.Component;
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.Map;
-import java.util.concurrent.ArrayBlockingQueue;
-import java.util.concurrent.RejectedExecutionException;
-import java.util.concurrent.ThreadPoolExecutor;
-import java.util.concurrent.TimeUnit;
-import java.util.function.Consumer;
-
-
-@Slf4j
-@Component
-@AllArgsConstructor
-public class EsScanJobSumAgg {
-    private RestHighLevelClient restHighLevelClient;
-
-    public void start() {
-        ThreadPoolExecutor executorService = ThreadPoolFactory.getThreadPoolExecutor();
-        Arrays.asList("qzu3fuJTQD-OEy1-ppXcRw", "EU6niZMWS6uGhpicyaWi9w", "DGLNkh83SXa-Ry66dvwszA")
-                .forEach(routing -> executorService.submit(() -> synData(routing)));
-//        Arrays.asList("qzu3fuJTQD-OEy1-ppXcRw", "EU6niZMWS6uGhpicyaWi9w", "DGLNkh83SXa-Ry66dvwszA")
-//                .parallelStream().forEach(this::synData);
-    }
-
-    public void synData(String routing) {
-        //ThreadPoolExecutor executorService = ThreadPoolFactory.getThreadPoolExecutor();
-        int poolSize = 20;
-        ArrayBlockingQueue<Runnable> objects = new ArrayBlockingQueue<>(poolSize * 2);
-        ThreadPoolExecutor executorService = new ThreadPoolExecutor(
-                poolSize, poolSize,
-                0L, TimeUnit.MILLISECONDS,
-                objects,
-                new ThreadFactoryBuilder().setNameFormat("ScanEs-pool").build(),
-                (r, executor) -> {
-                    try {
-                        executor.getQueue().put(r);
-                    } catch (InterruptedException e) {
-                        throw new RejectedExecutionException("interrupted", e);
-                    }
-                }
-        );
-
-        String dsl = "{\n" +
-                "  \"query\": {\n" +
-                "    \"match_all\": {}\n" +
-                "  }\n" +
-                "}";
-
-        Consumer<SearchHit[]> func = list -> {
-            BulkRequest bulkRequest = new BulkRequest();
-            bulkRequest.timeout("10m");
-            Arrays.stream(list).forEach(d -> {
-                String id = d.getId();
-                Map<String, Object> sourceAsMap = d.getSourceAsMap();
-                sourceAsMap.forEach((k, v) -> {
-                    Map<String, Integer> m = (Map<String, Integer>) v;
-                    m.forEach((k1, v1) -> {
-                        String index = k1.split("_del_")[0];
-                        if (Constant.indexs.contains(index)) {
-                            UpdateRequest request = new UpdateRequest("ng_rt_summary_" + index, "_doc", id);
-                            request.docAsUpsert(true);
-                            JSONObject j1 = new JSONObject();
-                            j1.put(k1, v1);
-                            JSONObject j2 = new JSONObject();
-                            j2.put("summary", j1);
-                            //j2.put("company_id", id);
-                            request.doc(JSONObject.toJSONString(j2), XContentType.JSON);
-                            request.retryOnConflict(6);
-                            bulkRequest.add(request);
-                        }
-                    });
-
-                });
-            });
-            executorService.submit(() -> {
-                try {
-                    restHighLevelClient.bulk(bulkRequest);
-                } catch (IOException e) {
-                    e.printStackTrace();
-                }
-            });
-
-        };
-
-        new EsFastScan(restHighLevelClient, func, "company_summary", "_doc", dsl, routing).scan();
-    }
-
-
-}

+ 0 - 115
src/main/java/com/winhc/task/job/EsScanJobSumAggPlus.java

@@ -1,115 +0,0 @@
-package com.winhc.task.job;
-
-import com.alibaba.fastjson.JSONObject;
-import com.google.common.util.concurrent.ThreadFactoryBuilder;
-import com.winhc.task.common.Constant;
-import com.winhc.task.framework.es.EsFastScan;
-import lombok.AllArgsConstructor;
-import lombok.extern.slf4j.Slf4j;
-import org.elasticsearch.action.bulk.BulkRequest;
-import org.elasticsearch.action.update.UpdateRequest;
-import org.elasticsearch.client.RestHighLevelClient;
-import org.elasticsearch.common.xcontent.XContentType;
-import org.elasticsearch.search.SearchHit;
-import org.springframework.stereotype.Component;
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.Map;
-import java.util.concurrent.ArrayBlockingQueue;
-import java.util.concurrent.RejectedExecutionException;
-import java.util.concurrent.ThreadPoolExecutor;
-import java.util.concurrent.TimeUnit;
-import java.util.function.Consumer;
-
-
-@Slf4j
-@Component
-@AllArgsConstructor
-public class EsScanJobSumAggPlus {
-    private RestHighLevelClient restHighLevelClient;
-
-    public void start() {
-
-        Arrays.asList("x-CxhuRDT-6fNo9SBkzEFw", "kVrnS8W5RqilOiT_ONjZxg", "XK8AQhV3Ry2y72KEnEzNHA")
-                .forEach(routing -> {
-                    new Thread(() -> {
-                        synData(routing);
-                    }).start();
-                });
-//        Arrays.asList("qzu3fuJTQD-OEy1-ppXcRw", "EU6niZMWS6uGhpicyaWi9w", "DGLNkh83SXa-Ry66dvwszA")
-//                .forEach(routing -> executorService.submit(() -> synData(routing)));
-//        Arrays.asList("qzu3fuJTQD-OEy1-ppXcRw", "EU6niZMWS6uGhpicyaWi9w", "DGLNkh83SXa-Ry66dvwszA")
-//                .parallelStream().forEach(this::synData);
-    }
-
-    public void synData(String routing) {
-        //ThreadPoolExecutor executorService = ThreadPoolFactory.getThreadPoolExecutor();
-        int poolSize = 20;
-        ArrayBlockingQueue<Runnable> objects = new ArrayBlockingQueue<>(poolSize * 2);
-        ThreadPoolExecutor executorService = new ThreadPoolExecutor(
-                poolSize, poolSize,
-                0L, TimeUnit.MILLISECONDS,
-                objects,
-                new ThreadFactoryBuilder().setNameFormat("ScanEs-pool").build(),
-                (r, executor) -> {
-                    try {
-                        executor.getQueue().put(r);
-                    } catch (InterruptedException e) {
-                        throw new RejectedExecutionException("interrupted", e);
-                    }
-                }
-        );
-
-        String dsl = "{\n" +
-                "  \"query\": {\n" +
-                "    \"match_all\": {}\n" +
-                "  }\n" +
-                "}";
-
-        Consumer<SearchHit[]> func = list -> {
-            BulkRequest bulkRequest = new BulkRequest();
-            bulkRequest.timeout("10m");
-            Arrays.stream(list).forEach(d -> {
-                //String id = d.getId();
-                Map<String, Object> sourceAsMap = d.getSourceAsMap();
-                //System.out.println(sourceAsMap.toString());
-                String sum_type = sourceAsMap.get("sum_type").toString();
-                String tn = sourceAsMap.get("tn").toString();
-                String id = sourceAsMap.get("id").toString();
-                Map<String, Integer> m = (Map<String, Integer>) sourceAsMap.get("summary");
-                m.forEach((k1, v1) -> {
-                    String index = k1.split("_del_")[0];
-                    if (Constant.indexs.contains(index)) {
-                        String pre = "";
-                        if ("person".equalsIgnoreCase(sum_type)) {
-                            pre = "person_";
-                        }
-                        UpdateRequest request = new UpdateRequest("ng_rt_summary_" + pre + index, "_doc", id);
-                        request.docAsUpsert(true);
-                        JSONObject j1 = new JSONObject();
-                        j1.put(k1, v1);
-                        JSONObject j2 = new JSONObject();
-                        j2.put("summary", j1);
-                        //j2.put("company_id", id);
-                        request.doc(JSONObject.toJSONString(j2), XContentType.JSON);
-                        request.retryOnConflict(6);
-                        bulkRequest.add(request);
-                    }
-                });
-
-            });
-            executorService.submit(() -> {
-                try {
-                    restHighLevelClient.bulk(bulkRequest);
-                } catch (IOException e) {
-                    e.printStackTrace();
-                }
-            });
-
-        };
-
-        new EsFastScan(restHighLevelClient, func, "test_tmp_xf_sum_v9", "_doc", dsl, routing).scan();
-    }
-
-
-}

+ 149 - 0
src/main/java/com/winhc/task/run/CleanDataTask.java

@@ -0,0 +1,149 @@
+package com.winhc.task.run;
+
+import com.mongodb.client.FindIterable;
+import com.mongodb.client.MongoCollection;
+import com.mongodb.client.model.Filters;
+import com.winhc.task.util.BaseUtils;
+import com.winhc.task.util.ESUtils;
+import lombok.AllArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.commons.lang3.StringUtils;
+import org.bson.Document;
+import org.bson.conversions.Bson;
+import org.frameworkset.elasticsearch.boot.BBossESStarter;
+import org.frameworkset.elasticsearch.client.ClientInterface;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.beans.factory.annotation.Qualifier;
+import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
+import org.springframework.data.mongodb.core.MongoTemplate;
+import org.springframework.scheduling.annotation.EnableScheduling;
+import org.springframework.scheduling.annotation.Scheduled;
+import org.springframework.stereotype.Component;
+
+import javax.annotation.PostConstruct;
+import java.util.ArrayList;
+import java.util.List;
+
+import static com.mongodb.client.model.Filters.in;
+
+/**
+ * @author π
+ * @Description:定时删除任务
+ * @date 2021/6/22 17:07
+ */
+
+@Component
+@Slf4j
+@EnableScheduling
+@AllArgsConstructor
+@ConditionalOnProperty(prefix = "scheduling", name = "enabled", havingValue = "true")
+public class CleanDataTask {
+    @Autowired
+    @Qualifier("bbossESStarterEs5")
+    private BBossESStarter bbossESStarterEs5;
+    @Autowired
+    @Qualifier("bbossESStarterEs6")
+    private BBossESStarter bbossESStarterEs6;
+
+    private ClientInterface restClient5;
+    private ClientInterface restClient6;
+
+    private final MongoTemplate mongoTemplate;
+
+
+    @PostConstruct
+    public void init() {
+        restClient5 = bbossESStarterEs5.getRestClient("es5");
+        restClient6 = bbossESStarterEs5.getRestClient("es6");
+    }
+
+    @Scheduled(cron = "*/30 * * * * ?")
+    //@Scheduled(cron = "0 /2 * * * ? ")
+    public void start() throws InterruptedException {
+        log.info("startJob CleanDataTask !!! ");
+
+        String wenshu_mongo = "wenshu_remove_user";
+        String case_mongo = "sifa_remove_user";
+        int batchSize = 200;
+
+//        String wenshu_index = "wenshu_detail2";
+//        String judicial_case_index = "winhc_judicial_case_v2";
+//        String judicial_case_detail_index = "winhc_judicial_case_detail_v2";
+
+        String wenshu_index = "test_wenshu";
+        String judicial_case_index = "test_case1";
+        String judicial_case_detail_index = "test_case2";
+
+
+        //先查询mongo再删除es文书
+        step1(wenshu_mongo, wenshu_index, batchSize);
+        //先查询mongo再删除es司法案件
+        step2(case_mongo, judicial_case_index, judicial_case_detail_index, batchSize);
+
+        log.info("stopJob CleanDataTask !!! ");
+
+    }
+
+    private void step2(String case_mongo, String judicial_case_index, String judicial_case_detail_index, int batchSize) throws InterruptedException {
+        ArrayList<String> caseList = new ArrayList<>();
+        for (Document document : mongoTemplate.getCollection(case_mongo).find().noCursorTimeout(true).batchSize(batchSize)) {
+            String case_no = BaseUtils.case_no_trim(document.getString("_id"));
+            if (StringUtils.isNotBlank(case_no)) {
+                caseList.add(case_no);
+            }
+            if (caseList.size() >= batchSize) {
+                caseDeleted(judicial_case_index, judicial_case_detail_index, caseList);
+            }
+        }
+        if (!caseList.isEmpty()) {
+            caseDeleted(judicial_case_index, judicial_case_detail_index, caseList);
+        }
+    }
+
+    private void step1(String wenshu_mongo, String wenshu_index, int batchSize) throws InterruptedException {
+        ArrayList<String> idList = new ArrayList<>();
+        for (Document document : mongoTemplate.getCollection(wenshu_mongo).find().noCursorTimeout(true).batchSize(batchSize)) {
+            String id = document.getString("_id");
+            if (StringUtils.isNotBlank(id)) {
+                idList.add(id);
+            }
+            if (idList.size() >= batchSize) {
+                wenshuUpdate(wenshu_index, idList);
+            }
+        }
+        if (!idList.isEmpty()) {
+            wenshuUpdate(wenshu_index, idList);
+        }
+    }
+
+    /**
+     * 物理删除司法案件
+     *
+     * @param judicial_case_index
+     * @param judicial_case_detail_index
+     * @param caseList
+     * @throws InterruptedException
+     */
+    private void caseDeleted(String judicial_case_index, String judicial_case_detail_index, ArrayList<String> caseList) throws InterruptedException {
+        String res2 = restClient6.deleteByQuery(judicial_case_index + "/_delete_by_query?conflicts=proceed&wait_for_completion=false&refresh=true", ESUtils.deletedByQuery(caseList));
+        String res3 = restClient6.deleteByQuery(judicial_case_detail_index + "/_delete_by_query?conflicts=proceed&wait_for_completion=false&refresh=true", ESUtils.deletedByQuery(caseList));
+        log.info("caseDeleted1 res : {}\n", res2);
+        log.info("caseDeleted2 res : {}\n", res3);
+        caseList.clear();
+        Thread.sleep(1000 * 2);
+    }
+
+    /**
+     * 逻辑删除文书
+     *
+     * @param wenshu_index
+     * @param idList
+     * @throws InterruptedException
+     */
+    private void wenshuUpdate(String wenshu_index, ArrayList<String> idList) throws InterruptedException {
+        String res1 = restClient5.updateByQuery(wenshu_index + "/_update_by_query?conflicts=proceed&wait_for_completion=false&refresh=true", ESUtils.updateWenshuById(idList));
+        log.info("wenshuUpdate res : {}\n", res1);
+        idList.clear();
+        Thread.sleep(1000 * 2);
+    }
+}

+ 0 - 53
src/main/java/com/winhc/task/service/impl/InnerSearchCompanyServiceImpl.java

@@ -1,53 +0,0 @@
-package com.winhc.task.service.impl;
-
-import com.winhc.task.service.InnerSearchCompanyService;
-import com.winhc.task.util.CompanyNameUtils;
-import com.winhc.task.dao.SearchDao;
-import lombok.AllArgsConstructor;
-import lombok.extern.slf4j.Slf4j;
-import org.elasticsearch.index.query.BoolQueryBuilder;
-import org.elasticsearch.index.query.MultiMatchQueryBuilder;
-import org.springframework.stereotype.Service;
-
-import static org.elasticsearch.index.query.QueryBuilders.*;
-
-/**
- * @author: XuJiakai
- * 2021/4/23 19:01
- */
-@Slf4j
-@Service
-@AllArgsConstructor
-public class InnerSearchCompanyServiceImpl implements InnerSearchCompanyService {
-    private final SearchDao searchDao;
-
-    public static final String index = "winhc-company-v8";
-    public static final String type = "company";
-
-
-    @Override
-    public Object searchCompanyName(String companyName) {
-        return searchDao.search(index, type, getBoolQuery(companyName), 0, 5);
-    }
-
-    private BoolQueryBuilder getBoolQuery(String companyName) {
-        BoolQueryBuilder boolQuery = boolQuery();
-        String content = CompanyNameUtils.cleanup(companyName);
-
-
-        boolQuery.should(termQuery("cname.value.keyword", content).boost(1000));
-        boolQuery.should(termQuery("history_name.value.keyword", content).boost(1000));
-        boolQuery.should(multiMatchQuery(companyName)
-                .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
-                .minimumShouldMatch("5<85%")
-                .tieBreaker(0.3F)
-                .field("cname.value", 16)
-                .field("history_name.value", 12)
-                .field("cname.value.pinyin", 6)
-                .field("history_name.value.pinyin", 6)
-
-        );
-
-        return boolQuery;
-    }
-}

+ 0 - 123
src/main/java/com/winhc/task/service/impl/SearchV7ServiceImpl.java

@@ -1,123 +0,0 @@
-package com.winhc.task.service.impl;
-
-import com.winhc.task.service.SearchService;
-import com.winhc.task.dao.impl.SearchDaoImpl;
-import lombok.AllArgsConstructor;
-import lombok.extern.slf4j.Slf4j;
-import org.elasticsearch.common.lucene.search.function.CombineFunction;
-import org.elasticsearch.index.query.BoolQueryBuilder;
-import org.elasticsearch.index.query.MultiMatchQueryBuilder;
-import org.elasticsearch.index.query.Operator;
-import org.elasticsearch.index.query.QueryBuilders;
-import org.elasticsearch.index.query.functionscore.FunctionScoreQueryBuilder;
-import org.elasticsearch.index.query.functionscore.ScriptScoreFunctionBuilder;
-import org.elasticsearch.script.Script;
-import org.elasticsearch.script.ScriptType;
-import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
-import org.elasticsearch.search.sort.ScriptSortBuilder;
-import org.elasticsearch.search.sort.SortOrder;
-import org.springframework.stereotype.Service;
-import java.util.HashMap;
-import java.util.Map;
-
-import static org.elasticsearch.index.query.QueryBuilders.*;
-
-/**
- * @author: XuJiakai
- * 2020/12/7 10:58
- */
-@Slf4j
-@Service(SearchV7ServiceImpl.index)
-@AllArgsConstructor
-public class SearchV7ServiceImpl implements SearchService {
-
-
-    private SearchDaoImpl searchDao;
-    private static final ScriptSortBuilder scriptSortBuilder = fastDefaultSort();
-
-    public static final String index = "winhc-company-v7";
-    public static final String type = "company";
-
-    private static final String[] includes = new String[]{"cname", "estiblish_time", "reg_status", "company_type", "province_code", "reg_capital", "logo"};
-    private static final FetchSourceContext fetchSourceContext = new FetchSourceContext(true, includes, null);
-
-
-    private static final String[] includes_tips = new String[]{"cname.show"};
-    private static final FetchSourceContext fetchSourceContext_tips = new FetchSourceContext(true, includes_tips, null);
-
-    private BoolQueryBuilder getBoolQuery(String content) {
-        BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
-        MultiMatchQueryBuilder multiMatchQueryBuilder = multiMatchQuery(content)
-                .operator(Operator.AND)
-                .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
-                .minimumShouldMatch("5<80%")
-                .tieBreaker(0.3F);
-
-        boolQuery.should(termQuery("cname.value.keyword", content).boost(100));
-        boolQuery.should(termQuery("history_name.value.keyword", content).boost(100));
-
-        multiMatchQueryBuilder
-                .field("cname.value", 16)
-                .field("history_name.value", 12)
-        ;
-
-
-        boolQuery.should(multiMatchQueryBuilder);
-        BoolQueryBuilder boolQuery2 = QueryBuilders.boolQuery();
-        boolQuery2.must(boolQuery);
-        boolQuery2.mustNot(QueryBuilders.existsQuery("current_id"));
-
-        return boolQuery2;
-
-    }
-
-
-    private static ScriptSortBuilder fastDefaultSort() {
-        Map<String, Object> params = new HashMap<>();
-        String script_inline =
-                "if(doc['cname.value.keyword'].value==null||doc['cname.value.keyword'].value.length()==3){" +
-                        "return 0.01;" +
-                        "}" +
-                        "if(doc['cname.value.keyword'].value.length()<=3){" +
-                        "return 0.3;" +
-                        "}" +
-                        "if(doc['reg_status'].value==null || doc['reg_status'].value.contains('销')){" +
-                        "return 1;" +
-                        "}" +
-                        "double a = doc['reg_capital_amount']==null?0.0:doc['reg_capital_amount'].value>1000000000000.0?1000000000000.0:doc['reg_capital_amount'].value;" +
-                        "double w = Math.log(a/10000000+1)+1;" +
-                        "if(doc['company_type'].value=='1'){" +
-                        "w=w+3;" +
-                        "}" +
-                        "return w;";
-
-        Script script = new Script(ScriptType.INLINE, "painless", script_inline, params);
-        return new ScriptSortBuilder(script, ScriptSortBuilder.ScriptSortType.NUMBER).order(SortOrder.DESC);
-
-    }
-
-    @Override
-    public Object tips(String s) {
-        BoolQueryBuilder boolQuery = getBoolQuery(s);
-        FunctionScoreQueryBuilder function = functionScoreQuery(boolQuery, new ScriptScoreFunctionBuilder(scriptSortBuilder.script())
-        )
-                .boostMode(CombineFunction.MULTIPLY);
-
-        return searchDao.search(index, type, function, null, fetchSourceContext_tips, 0, 5);
-    }
-
-    @Override
-    public Object controlGroup(String s) {
-        return searchDao.search(index, type, getBoolQuery(s), null, fetchSourceContext, 0, 10);
-    }
-
-    @Override
-    public Object query(String s, int from, int size) {
-        BoolQueryBuilder boolQuery = getBoolQuery(s);
-        FunctionScoreQueryBuilder function = functionScoreQuery(boolQuery, new ScriptScoreFunctionBuilder(scriptSortBuilder.script())
-        )
-                .boostMode(CombineFunction.MULTIPLY);
-
-        return searchDao.search(index, type, function, null, fetchSourceContext, from, size);
-    }
-}

+ 0 - 232
src/main/java/com/winhc/task/service/impl/SearchV8FastServiceImpl.java

@@ -1,232 +0,0 @@
-package com.winhc.task.service.impl;
-
-import com.winhc.task.service.SearchService;
-import com.winhc.task.dao.SearchDao;
-import lombok.AllArgsConstructor;
-import lombok.extern.slf4j.Slf4j;
-import org.elasticsearch.index.query.*;
-import org.elasticsearch.index.query.functionscore.ScriptScoreFunctionBuilder;
-import org.elasticsearch.script.Script;
-import org.elasticsearch.script.ScriptType;
-import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
-import org.elasticsearch.search.rescore.QueryRescoreMode;
-import org.elasticsearch.search.rescore.QueryRescorerBuilder;
-import org.elasticsearch.search.sort.FieldSortBuilder;
-import org.elasticsearch.search.sort.ScriptSortBuilder;
-import org.elasticsearch.search.sort.SortBuilders;
-import org.elasticsearch.search.sort.SortOrder;
-import org.springframework.context.annotation.Primary;
-import org.springframework.stereotype.Service;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.regex.Pattern;
-
-import static org.elasticsearch.index.query.QueryBuilders.*;
-
-/**
- * @author: XuJiakai
- * 2020/11/19 14:54
- */
-@Slf4j
-@Primary
-@Service(value = "v8_fast")
-@AllArgsConstructor
-public class SearchV8FastServiceImpl implements SearchService {
-    private SearchDao searchDao;
-
-//        public static final String index = "winhc-company-v8";
-    public static final String index = "winhc-company-v8_3";
-    public static final String type = "company";
-    private static final String[] includes = new String[]{"cname", "legal_entity*", "estiblish_time", "reg_status_std", "company_type", "province_code", "reg_capital", "logo", "new_cid"};
-    private static final FetchSourceContext fetchSourceContext = new FetchSourceContext(true, includes, null);
-
-    private static final String[] includes_tips = new String[]{"cname.show"};
-    private static final FetchSourceContext fetchSourceContext_tips = new FetchSourceContext(true, includes_tips, null);
-
-
-    @Override
-    public Object tips(String content) {
-        BoolQueryBuilder queryBuilder = boolQuery()
-                .should(matchPhrasePrefixQuery("cname.value", content))
-//                .should(matchPhraseQuery("app_info", content))
-                ;
-        FieldSortBuilder company_score_weight = SortBuilders.fieldSort("company_score_weight").order(SortOrder.DESC);
-
-
-        Object search = searchDao.search(index, type, queryBuilder, company_score_weight, null, 0, 5);
-        return search;
-    }
-
-    @Override
-    public Object controlGroup(String s) {
-        QueryBuilder boolQuery = getBoolQuery(s);
-        return searchDao.search(index, type, boolQuery, null, fetchSourceContext, 0, 10);
-    }
-
-    @Override
-    public Object query(String content, int from, int size) {
-        BoolQueryBuilder boolQuery = getBoolQuery(content);
-        Map<String, Object> map = new HashMap<String, Object>() {{
-            put("query_content", content);
-        }};
-        ScriptSortBuilder scriptSortBuilder = new ScriptSortBuilder(new Script(ScriptType.STORED, null, "company-search-script", map), ScriptSortBuilder.ScriptSortType.NUMBER).order(SortOrder.DESC);
-
-
-        QueryRescorerBuilder rescorerBuilder = new QueryRescorerBuilder(functionScoreQuery(new ScriptScoreFunctionBuilder(scriptSortBuilder.script())))
-                .windowSize(50)
-                .setScoreMode(QueryRescoreMode.Multiply);
-
-        Object search = searchDao.search(index, type, boolQuery, rescorerBuilder, null, fetchSourceContext_tips, from, size);
-        return search;
-    }
-
-
-    private BoolQueryBuilder getBoolQuery(String content) {
-        BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
-
-        boolQuery.should(termQuery("cname.value.keyword", content).boost(1000));
-        boolQuery.should(termQuery("history_name.value.keyword", content).boost(1000));
-
-        boolQuery.should(disMaxQuery()
-                        .add(disMaxQuery()
-                                .add(termQuery("legal_entity_name.keyword", content).boost(10))
-                                .add(termQuery("holder.name.keyword", content).boost(5.5F))
-                                .add(termQuery("staff.name.keyword", content).boost(5.5F))
-                                .tieBreaker(0.3F)
-                        ).add(disMaxQuery()
-
-                                .add(matchQuery("legal_entity_name", content).boost(6).minimumShouldMatch("5<95%"))
-//                        .add(matchQuery("holder", content).boost(10).minimumShouldMatch("5<80%"))
-//                        .add(matchQuery("staff", content).boost(6).minimumShouldMatch("5<80%"))
-
-//                        .add(matchPhraseQuery("legal_entity_name", content).boost(6).slop(3))
-                                .add(matchPhraseQuery("holder.name", content).boost(10).slop(3))
-                                .add(matchPhraseQuery("staff.name", content).boost(6).slop(3))
-
-                                .tieBreaker(0.3F)
-                        ).tieBreaker(0.3F)
-        );
-        boolQuery.should(disMaxQuery()
-                        .add(disMaxQuery()
-                                .add(termQuery("icp.keyword", content).boost(20))
-                                .add(termQuery("app_info.keyword", content).boost(40))
-                                .add(termQuery("company_tm.keyword", content).boost(20))
-                                .tieBreaker(0.4F))
-                        .add(disMaxQuery()
-//                        .add(matchQuery("icp", content).boost(8).minimumShouldMatch("5"))
-//                        .add(matchQuery("app_info", content).boost(19).minimumShouldMatch("5"))
-//                        .add(matchQuery("company_tm", content).boost(7).minimumShouldMatch("5"))
-
-
-                                        .add(matchPhraseQuery("icp", content).boost(8).slop(3))
-                                        .add(matchPhraseQuery("app_info", content).boost(19).slop(3))
-                                        .add(matchPhraseQuery("company_tm", content).boost(7).slop(3))
-
-                                        .tieBreaker(0.3F)
-                        ).tieBreaker(0.4F)
-        );
-
-
-        boolQuery.should(
-                disMaxQuery()
-                        .add(multiMatchQuery(content)
-                                .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
-                                .minimumShouldMatch("5<90%")
-                                .tieBreaker(0.3F)
-
-                                .field("cname.value", 16)
-                                .field("history_name.value", 12))
-                        .add(multiMatchQuery(content)
-                                .operator(Operator.AND)
-                                .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
-                                .tieBreaker(0.3F)
-                                .field("cname.value.standard", 16)
-                                .field("history_name.value.standard", 12))
-                        .tieBreaker(0.4F)
-        );
-
-
-        BoolQueryBuilder boolQuery2 = QueryBuilders.boolQuery()
-                .filter(termQuery("deleted", "0"))
-                .filter(rangeQuery("company_score_weight").gt(0.3F))
-                .must(boolQuery);
-        return boolQuery2;
-    }
-
-
-    private BoolQueryBuilder getPersonQuery(String content) {
-        BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
-
-        boolQuery.should(termQuery("cname.value.keyword", content).boost(1000));
-        boolQuery.should(termQuery("history_name.value.keyword", content).boost(1000));
-
-        boolQuery.should(disMaxQuery()
-                .add(disMaxQuery()
-                        .add(termQuery("legal_entity_name.keyword", content).boost(10))
-                        .add(termQuery("holder.name.keyword", content).boost(5.5F))
-                        .add(termQuery("staff.name.keyword", content).boost(5.5F))
-                        .tieBreaker(0.3F)
-                ).add(disMaxQuery()
-                        .add(matchQuery("legal_entity_name", content).boost(6).minimumShouldMatch("5<95%"))
-                        .add(matchPhraseQuery("holder.name", content).boost(10).slop(3))
-                        .add(matchPhraseQuery("staff.name", content).boost(6).slop(3))
-                        .tieBreaker(0.3F)
-                )
-                .boost(2f)
-                .tieBreaker(0.3F)
-        );
-
-
-        boolQuery.should(disMaxQuery()
-                .add(disMaxQuery()
-                        .add(termQuery("icp.keyword", content).boost(20))
-                        .add(termQuery("app_info.keyword", content).boost(40))
-                        .add(termQuery("company_tm.keyword", content).boost(20))
-                        .tieBreaker(0.4F))
-                .add(disMaxQuery()
-                        .add(matchPhraseQuery("icp", content).boost(8).slop(3))
-                        .add(matchPhraseQuery("app_info", content).boost(19).slop(3))
-                        .add(matchPhraseQuery("company_tm", content).boost(7).slop(3))
-
-                        .tieBreaker(0.3F)
-                )
-                .boost(0.5f)
-                .tieBreaker(0.1F)
-        );
-
-
-        boolQuery.should(
-                disMaxQuery()
-                        .add(multiMatchQuery(content)
-                                .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
-                                .minimumShouldMatch("5<90%")
-                                .tieBreaker(0.3F)
-
-                                .field("cname.value", 16)
-                                .field("history_name.value", 12))
-                        .add(multiMatchQuery(content)
-                                .operator(Operator.AND)
-                                .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
-                                .tieBreaker(0.3F)
-                                .field("cname.value.standard", 16)
-                                .field("history_name.value.standard", 12))
-                        .boost(0.5f)
-                        .tieBreaker(0.1F)
-        );
-
-
-        BoolQueryBuilder boolQuery2 = QueryBuilders.boolQuery()
-                .filter(termQuery("deleted", "0"))
-                .filter(rangeQuery("company_score_weight").gt(0.3F))
-                .must(boolQuery);
-        return boolQuery2;
-    }
-
-
-    private static final Pattern pattern = Pattern.compile("^[a-zA-Z ]*$");
-
-    private static boolean is_pinyin(String str) {
-        return pattern.matcher(str).find();
-    }
-
-}

+ 0 - 255
src/main/java/com/winhc/task/service/impl/SearchV8ServiceImpl.java

@@ -1,255 +0,0 @@
-package com.winhc.task.service.impl;
-
-import com.winhc.task.service.SearchService;
-import com.winhc.task.util.SortUtil;
-import com.winhc.task.dao.SearchDao;
-import lombok.AllArgsConstructor;
-import lombok.extern.slf4j.Slf4j;
-import org.elasticsearch.common.lucene.search.function.CombineFunction;
-import org.elasticsearch.common.lucene.search.function.FiltersFunctionScoreQuery;
-import org.elasticsearch.index.query.*;
-import org.elasticsearch.index.query.functionscore.FunctionScoreQueryBuilder;
-import org.elasticsearch.index.query.functionscore.ScriptScoreFunctionBuilder;
-import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
-import org.elasticsearch.search.sort.ScriptSortBuilder;
-import org.springframework.context.annotation.Primary;
-import org.springframework.stereotype.Service;
-
-import java.util.regex.Pattern;
-
-import static org.elasticsearch.index.query.QueryBuilders.*;
-
-/**
- * @author: XuJiakai
- * 2020/11/19 14:54
- */
-@Slf4j
-@Primary
-@Service(value = SearchV8ServiceImpl.index)
-@AllArgsConstructor
-public class SearchV8ServiceImpl implements SearchService {
-    private SearchDao searchDao;
-
-    public static final String index = "winhc-company-v8";
-    public static final String type = "company";
-    private static final String[] includes = new String[]{"cname", "legal_entity*", "estiblish_time", "reg_status_std", "company_type", "province_code", "reg_capital", "logo", "new_cid"};
-    private static final FetchSourceContext fetchSourceContext = new FetchSourceContext(true, includes, null);
-
-    private static final String[] includes_tips = new String[]{"cname.show"};
-    private static final FetchSourceContext fetchSourceContext_tips = new FetchSourceContext(true, includes_tips, null);
-
-
-    @Override
-    public Object tips(String content) {
-        BoolQueryBuilder boolQuery = getBoolQuery(content);
-        ScriptSortBuilder scriptSortBuilder = SortUtil.getInstance().fastSort;
-        FunctionScoreQueryBuilder function = functionScoreQuery(boolQuery, new ScriptScoreFunctionBuilder(scriptSortBuilder.script())
-        )
-                .scoreMode(FiltersFunctionScoreQuery.ScoreMode.SUM)
-                .boostMode(CombineFunction.MULTIPLY);
-
-        Object search = searchDao.search(index, type, function, null, fetchSourceContext_tips, 0, 5);
-        return search;
-    }
-
-    @Override
-    public Object controlGroup(String s) {
-        QueryBuilder boolQuery = getBoolQuery(s);
-        return searchDao.search(index, type, boolQuery, null, fetchSourceContext, 0, 10);
-    }
-
-    @Override
-    public Object query(String content, int from, int size) {
-        BoolQueryBuilder boolQuery = getBoolQuery(content);
-        ScriptSortBuilder scriptSortBuilder = SortUtil.getInstance().fastSort;
-        FunctionScoreQueryBuilder function = functionScoreQuery(boolQuery, new ScriptScoreFunctionBuilder(scriptSortBuilder.script())
-        )
-                .boostMode(CombineFunction.MULTIPLY);
-
-        Object search = searchDao.search(index, type, function, null, fetchSourceContext, from, size);
-        return search;
-    }
-
-    private BoolQueryBuilder getBoolQuery(String content) {
-        BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
-
-
-//        boolQuery.should(matchPhrasePrefixQuery("cname.value.pinyin", content).analyzer("ik_pinyin_analyzer_search").maxExpansions(5).boost(0.1F));
-
-
-        boolQuery.should(termQuery("cname.value.keyword", content).boost(1000));
-        boolQuery.should(termQuery("history_name.value.keyword", content).boost(1000));
-
-        /*boolQuery.should(disMaxQuery().add(
-                QueryBuilders.boolQuery()
-                        .should(termQuery("legal_entity_name.keyword", content).boost(10))
-                        .should(termQuery("holder.name.keyword", content).boost(5.5F))
-                        .should(termQuery("staff.name.keyword", content).boost(5.5F))
-        ).tieBreaker(0.3F));*/
-
-
-        boolQuery.should(disMaxQuery()
-                .add(disMaxQuery()
-                        .add(termQuery("legal_entity_name.keyword", content).boost(10))
-                        .add(termQuery("holder.name.keyword", content).boost(5.5F))
-                        .add(termQuery("staff.name.keyword", content).boost(5.5F))
-                        .tieBreaker(0.3F))
-                .add(disMaxQuery()
-                        .add(matchQuery("legal_entity_name", content).boost(6).minimumShouldMatch("5<80%"))
-                        .add(matchQuery("holder", content).boost(10).minimumShouldMatch("5<80%"))
-                        .add(matchQuery("staff", content).boost(6).minimumShouldMatch("5<80%"))
-                        .tieBreaker(0.3F)
-                ).tieBreaker(0.3F)
-        );
-
-     /*   boolQuery.should(disMaxQuery()
-                .add(termQuery("legal_entity_name.keyword", content).boost(10))
-                .add(termQuery("holder.name.keyword", content).boost(5.5F))
-                .add(termQuery("staff.name.keyword", content).boost(5.5F))
-                .tieBreaker(0.3F));*/
-
-      /*  boolQuery.should(matchQuery("icp", content).boost(6));
-        boolQuery.should(matchQuery("app_info", content).boost(12));
-        boolQuery.should(matchQuery("company_tm", content).boost(5));*/
-
-
-//        boolQuery.should(termQuery("icp.keyword", content).boost(5));
-//        boolQuery.should(termQuery("app_info.keyword", content).boost(8));
-//        boolQuery.should(termQuery("company_tm.keyword", content).boost(5));
-
-   /*     boolQuery.should(disMaxQuery().add(
-                QueryBuilders.boolQuery()
-                        .should(termQuery("icp.keyword", content).boost(20))
-                        .should(termQuery("app_info.keyword", content).boost(40))
-                        .should(termQuery("company_tm.keyword", content).boost(20))
-                ).tieBreaker(0.3F)
-        );
-*/
-
-     /*   boolQuery.should(disMaxQuery()
-                .add(termQuery("icp.keyword", content).boost(20))
-                .add(termQuery("app_info.keyword", content).boost(40))
-                .add(termQuery("company_tm.keyword", content).boost(20))
-                .tieBreaker(0.3F)
-        );*/
-
-        boolQuery.should(disMaxQuery()
-                .add(disMaxQuery()
-                        .add(termQuery("icp.keyword", content).boost(20))
-                        .add(termQuery("app_info.keyword", content).boost(40))
-                        .add(termQuery("company_tm.keyword", content).boost(20))
-                        .tieBreaker(0.4F)
-                )
-
-                .add(
-                        disMaxQuery()
-                                .add(matchQuery("icp", content).boost(8).minimumShouldMatch("5"))
-                                .add(matchQuery("app_info", content).boost(19).minimumShouldMatch("5"))
-                                .add(matchQuery("company_tm", content).boost(7).minimumShouldMatch("5"))
-                                .tieBreaker(0.3F)
-                )
-
-                /*.add(   //todo 在排序上会有影响   ps. 奔驰
-                        boolQuery()
-                                .should(matchQuery("icp", content).boost(8))
-                                .should(matchQuery("app_info", content).boost(19))
-                                .should(matchQuery("company_tm", content).boost(7))
-
-                                .should(termQuery("icp.keyword", content).boost(20))
-                                .should(termQuery("app_info.keyword", content).boost(40))
-                                .should(termQuery("company_tm.keyword", content).boost(20))
-                                .minimumShouldMatch("5<80%")
-
-                )*/
-
-                /*.add(
-                        disMaxQuery()
-                                .add(boolQuery().should(matchQuery("icp", content).boost(8)).minimumShouldMatch("5<80%"))
-                                .add(boolQuery().should(matchQuery("app_info", content).boost(19)).minimumShouldMatch("5<80%"))
-                                .add(boolQuery().should(matchQuery("company_tm", content).boost(7)).minimumShouldMatch("5<80%"))
-                                .tieBreaker(0.3F)
-
-                )*/
-
-                .tieBreaker(0.4F)
-        );
-
-
-      /*  boolQuery.should(disMaxQuery()
-                .tieBreaker(0.3F)
-
-                .add(matchQuery("icp", content).boost(6).minimumShouldMatch("5<80%"))
-                .add(matchQuery("app_info", content).boost(15).minimumShouldMatch("5<80%"))
-                .add(matchQuery("company_tm", content).boost(5).minimumShouldMatch("5<80%"))
-
-                .add(termQuery("icp.keyword", content).boost(20))
-                .add(termQuery("app_info.keyword", content).boost(40))
-                .add(termQuery("company_tm.keyword", content).boost(30))
-
-        );*/
-
-
-
-        /*  boolQuery.should(disMaxQuery()
-         *//*  .add(
-                        QueryBuilders.boolQuery()
-
-                                .should(matchQuery("icp", content).boost(6))
-                                .should(matchQuery("app_info", content).boost(15))
-                                .should(matchQuery("company_tm", content).boost(5))
-                                .minimumShouldMatch("5<80%")
-                )*//*
-                .add(
-                        QueryBuilders.boolQuery()
-                                .should(termQuery("icp.keyword", content).boost(20))
-                                .should(termQuery("app_info.keyword", content).boost(40))
-                                .should(termQuery("company_tm.keyword", content).boost(30))
-                )
-                .add(
-                        multiMatchQuery(content)
-                                .operator(Operator.AND)
-                                .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
-                                .minimumShouldMatch("5<90%")
-                                .tieBreaker(0.3F)
-                                .field("icp", 6)
-                                .field("app_info", 15)
-                                .field("company_tm", 5)
-                )
-                .tieBreaker(0.3F)
-        );*/
-
-     /*   boolQuery.should(multiMatchQuery(content)
-                .operator(Operator.AND)
-                .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
-                .tieBreaker(0.3F)
-                .field("icp", 3)
-                .field("app_info", 8)
-                .field("company_tm", 1.5F)
-        );*/
-
-        boolQuery.should(multiMatchQuery(content)
-//                .operator(Operator.AND)
-                .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
-                .minimumShouldMatch("5<85%")
-                .tieBreaker(0.3F)
-                .field("cname.value", 16)
-                .field("history_name.value", 12)
-//                .field("holder.name", 6)
-//                .field("staff.name", 6)
-//                .field("legal_entity_name", 6)
-
-        );
-
-
-        BoolQueryBuilder boolQuery2 = QueryBuilders.boolQuery()
-                .filter(termQuery("deleted", "0"))
-                .must(boolQuery);
-        return boolQuery2;
-    }
-
-    private static final Pattern pattern = Pattern.compile("^[a-zA-Z ]*$");
-
-    private static boolean is_pinyin(String str) {
-        return pattern.matcher(str).find();
-    }
-}

+ 0 - 119
src/main/java/com/winhc/task/service/impl/SearchV8SimpServiceImpl.java

@@ -1,119 +0,0 @@
-package com.winhc.task.service.impl;
-
-import com.winhc.task.service.SearchService;
-import com.winhc.task.util.SortUtil;
-import com.winhc.task.dao.SearchDao;
-import lombok.AllArgsConstructor;
-import lombok.extern.slf4j.Slf4j;
-import org.elasticsearch.common.lucene.search.function.CombineFunction;
-import org.elasticsearch.common.lucene.search.function.FiltersFunctionScoreQuery;
-import org.elasticsearch.index.query.BoolQueryBuilder;
-import org.elasticsearch.index.query.MultiMatchQueryBuilder;
-import org.elasticsearch.index.query.Operator;
-import org.elasticsearch.index.query.QueryBuilders;
-import org.elasticsearch.index.query.functionscore.FunctionScoreQueryBuilder;
-import org.elasticsearch.index.query.functionscore.ScriptScoreFunctionBuilder;
-import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
-import org.elasticsearch.search.sort.ScriptSortBuilder;
-import org.springframework.stereotype.Service;
-
-import static org.elasticsearch.index.query.QueryBuilders.*;
-
-/**
- * @author: XuJiakai
- * 2020/12/8 19:12
- */
-@Slf4j
-@Service(SearchV8SimpServiceImpl.index)
-@AllArgsConstructor
-public class SearchV8SimpServiceImpl implements SearchService {
-    private SearchDao searchDao;
-
-    public static final String index = "winhc-company-v8-simp";
-    public static final String type = "company";
-
-    private static final String[] includes_tips = new String[]{"cname.show", "new_cid"};
-    private static final FetchSourceContext fetchSourceContext_tips = new FetchSourceContext(true, includes_tips, null);
-
-
-    @Override
-    public Object tips(String s) {
-        BoolQueryBuilder boolQuery = getBoolQuery(s);
-        ScriptSortBuilder scriptSortBuilder = SortUtil.getInstance().fastSort;
-        FunctionScoreQueryBuilder function = functionScoreQuery(boolQuery, new ScriptScoreFunctionBuilder(scriptSortBuilder.script())
-        )
-                .scoreMode(FiltersFunctionScoreQuery.ScoreMode.SUM)
-                .boostMode(CombineFunction.MULTIPLY);
-
-        Object search = searchDao.search(index, type, function, null, fetchSourceContext_tips, 0, 5);
-        return search;
-    }
-
-    @Override
-    public Object controlGroup(String s) {
-        return searchDao.search(index, type, getBoolQuery(s), null, fetchSourceContext_tips, 0, 10);
-    }
-
-    @Override
-    public Object query(String s, int from, int size) {
-        BoolQueryBuilder boolQuery = getBoolQuery(s);
-        ScriptSortBuilder scriptSortBuilder = SortUtil.getInstance().fastSort;
-        FunctionScoreQueryBuilder function = functionScoreQuery(boolQuery, new ScriptScoreFunctionBuilder(scriptSortBuilder.script())
-        )
-                .boostMode(CombineFunction.MULTIPLY);
-
-        Object search = searchDao.search(index, type, function, null, fetchSourceContext_tips, from, size);
-        return search;
-    }
-
-
-    private BoolQueryBuilder getBoolQuery(String content) {
-        BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
-
-        // 公司现用名*100
-        boolQuery.should(termQuery("cname.value.keyword", content).boost(100));
-        // 公司曾用名*100
-        boolQuery.should(termQuery("history_name.value.keyword", content).boost(100));
-
-
-        // sum(商标全匹配*20,产品全匹配*40)
-        boolQuery.should(
-                boolQuery()
-                        .should(termQuery("app_info.keyword", content).boost(40))
-                        .should(termQuery("company_tm.keyword", content).boost(20))
-
-        );
-
-        // sum(商标*5,产品信息*15,商标全匹配*20,产品全匹配*40)
-        boolQuery.should(
-                boolQuery()
-
-                        .should(matchQuery("app_info", content).boost(15))
-                        .should(matchQuery("company_tm", content).boost(5))
-
-                        .should(termQuery("app_info.keyword", content).boost(40))
-                        .should(termQuery("company_tm.keyword", content).boost(20))
-                        // 加入最小匹配度
-                        .minimumShouldMatch("5<80%")
-        );
-
-        // 0.7 * max(公司现用名*16,曾用名*12)+ 0.3 * sum(公司现用名*16,曾用名*12)
-        MultiMatchQueryBuilder multiMatchQueryBuilder = multiMatchQuery(content)
-                .operator(Operator.AND)
-                .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
-                .minimumShouldMatch("5<80%")
-                .tieBreaker(0.3F)
-                .field("cname.value", 16)
-                .field("history_name.value", 12);
-
-        boolQuery.should(multiMatchQueryBuilder);
-
-        BoolQueryBuilder boolQuery2 = boolQuery()
-                // 过deleted非0数据
-                .filter(termQuery("deleted", "0"))
-                .must(boolQuery);
-        return boolQuery2;
-
-    }
-
-}

+ 31 - 2
src/main/java/com/winhc/task/util/BaseUtils.java

@@ -9,11 +9,13 @@ import org.apache.commons.codec.digest.DigestUtils;
 import org.apache.commons.collections.CollectionUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.hadoop.hbase.TableName;
+
 import java.text.DecimalFormat;
 import java.text.ParseException;
 import java.text.SimpleDateFormat;
 import java.util.*;
 import java.util.function.Function;
+import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
@@ -234,7 +236,6 @@ public class BaseUtils {
     }};
 
 
-
     public static String avoidNullString(String s) {
         if (StringUtils.isBlank(s)) return "";
         return s;
@@ -329,14 +330,42 @@ public class BaseUtils {
             return new ArrayList<>();
         } else {
             return Arrays.stream(url.replaceAll("[;\t\n;,。]", ",")
-                            .split(","))
+                    .split(","))
                     .filter(StringUtils::isNotBlank)
                     .distinct()
                     .collect(Collectors.toList());
         }
     }
 
+    private static final Pattern year_pat_2 = Pattern.compile("\\(?(\\d{4}?)\\)?年");
+    private static final Pattern case_pat3 = Pattern.compile(".*([(\\(]\\d{4}[)\\)][^号]*号.*?)");
+
+    public static String case_no_trim(String str) {
+        if (StringUtils.isBlank(str)) return null;
+        String case_no = toDBC(str);
+        case_no = case_no.replace(" ", "");
+        if (case_no.length() < 8) return null;
+        case_no = case_no.replace("(", "(")
+                .replace(")", ")")
+                .replaceAll("O", "0")
+                .replaceAll("号{2,}", "号");
+        Matcher matcher = year_pat_2.matcher(case_no);
+        if (matcher.find()) {
+            case_no = matcher.replaceAll("\\($1\\)");
+        }
+        Matcher matcher2 = case_pat3.matcher(case_no);
+        if (matcher2.find()) {
+            case_no = matcher2.replaceAll("$1");
+            return case_no;
+        } else return null;
+    }
+
+
     public static void main(String[] args) throws ParseException {
+        Arrays.asList("(2020)京0101民初17590号号","(2018)苏O1O4民初12422号","2019年苏民申2448号","(2019)苏民申2448")
+                .stream().forEach(c->{
+            System.out.println(case_no_trim(c));
+        });
         String u1 = "www.baijinggame.cn,baijinggame.com,www.baijinggame.com,yuxianweb.cn,www.yuxianweb.cn";
         String u2 = "https://网络.中国,www.yuxianweb.com,baijinggame.cn,yuxianweb.cn,www.yuxianweb.cn";
         String u3 = "https://网络.中国,www.yuxianweb.com,baijinggame.cn,www.baijinggame.cn,baijinggame.com,www.baijinggame.com,yuxianweb.cn,www.yuxianweb.cn";

+ 45 - 0
src/main/java/com/winhc/task/util/ESUtils.java

@@ -0,0 +1,45 @@
+package com.winhc.task.util;
+
+import lombok.extern.slf4j.Slf4j;
+
+import java.util.List;
+import java.util.stream.Collectors;
+
+/**
+ * @author π
+ * @Description:
+ * @date 2022/1/10 14:13
+ */
+@Slf4j
+public class ESUtils {
+    public static String updateWenshuById(List<String> ids) {
+        String s1 = ids.stream().collect(Collectors.joining("\",\"", "\"", "\""));
+        String dsl = "{\n" +
+                "  \"query\": {\n" +
+                "    \"terms\": {\n" +
+                "      \"_id\": [\n" +
+                "        " + s1 + "\n" +
+                "      ]\n" +
+                "    }\n" +
+                "  },\n" +
+                "  \"script\": {\n" +
+                "    \"inline\": \" ctx._source['deleted'] = '9' \"\n" +
+                "  }\n" +
+                "}";
+        return dsl;
+    }
+
+    public static String deletedByQuery(List<String> caseList) {
+        String s2 = caseList.stream().collect(Collectors.joining("\",\"", "\"", "\""));
+        String dsl = "{\n" +
+                "  \"query\": {\n" +
+                "    \"terms\": {\n" +
+                "      \"case_no.keyword\": [\n" +
+                "        " + s2 + "\n" +
+                "      ]\n" +
+                "    }\n" +
+                "  }\n" +
+                "}";
+        return dsl;
+    }
+}

+ 82 - 0
src/main/resources/application-dev.yml

@@ -0,0 +1,82 @@
+spring:
+  data:
+    mongodb:
+      uri: mongodb://itslaw:itslaw_168@dds-uf6ff5dfd9aef3641601-pub.mongodb.rds.aliyuncs.com:3717,dds-uf6ff5dfd9aef3642555-pub.mongodb.rds.aliyuncs.com:3717/itslaw?replicaSet=mgset-6501997
+  elasticsearch:
+    bboss:
+      es5:
+        name: es5
+        elasticUser: elastic
+        elasticPassword: elastic_168
+        elasticsearch:
+          rest:
+            hostNames: es-cn-0pp0r32zf000ipovd.public.elasticsearch.aliyuncs.com:9200
+          dateFormat: yyyy.MM.dd
+          timeZone: Asia/Shanghai
+          showTemplate: true
+          discoverHost: false
+        dslfile:
+          refreshInterval: -1
+        http:
+          timeoutConnection: 600000
+          timeoutSocket: 600000
+          connectionRequestTimeout: 600000
+          retryTime: -1
+          maxLineLength: -1
+          maxHeaderCount: 200
+          maxTotal: 400
+          defaultMaxPerRoute: 200
+          soReuseAddress: false
+          soKeepAlive: false
+          timeToLive: 3600000
+          keepAlive: 3600000
+          keystore:
+          keyPassword:
+          hostnameVerifier:
+      es6:
+        name: es6
+        elasticUser: elastic
+        elasticPassword: elastic_168
+        elasticsearch:
+          rest:
+            hostNames: es-cn-oew22t8bw002iferu.public.elasticsearch.aliyuncs.com:9200
+          dateFormat: yyyy.MM.dd
+          timeZone: Asia/Shanghai
+          showTemplate: true
+          discoverHost: false
+        dslfile:
+          refreshInterval: -1
+        http:
+          timeoutConnection: 600000
+          timeoutSocket: 600000
+          connectionRequestTimeout: 600000
+          retryTime: -1
+          maxLineLength: -1
+          maxHeaderCount: 200
+          maxTotal: 400
+          defaultMaxPerRoute: 200
+          soReuseAddress: false
+          soKeepAlive: false
+          timeToLive: 3600000
+          keepAlive: 3600000
+          keystore:
+          keyPassword:
+          hostnameVerifier:
+
+hbase:
+  config:
+    hbase.zookeeper.quorum: hb-proxy-pub-uf6m8e1nu4ivp06m5-master1-001.hbase.rds.aliyuncs.com:2181,hb-proxy-pub-uf6m8e1nu4ivp06m5-master2-001.hbase.rds.aliyuncs.com:2181,hb-proxy-pub-uf6m8e1nu4ivp06m5-master3-001.hbase.rds.aliyuncs.com:2181
+    hbase.client.scanner.timeout.period: 120000
+    hbase.client.retries.number: 5
+    hbase.client.pause: 1000
+    hbase.client.max.perserver.tasks: 10
+    hbase.client.max.perregion.tasks: 10
+    hbase.client.keyvalue.maxsize: 524288000
+    hbase.client.ipc.pool.size: 5
+    zookeeper.recovery.retry: 5
+
+  freemarker:
+    template-loader-path: classpath:/templates/
+
+scheduling:
+  enabled: true

+ 104 - 0
src/main/resources/application-prod.yml

@@ -0,0 +1,104 @@
+spring:
+  data:
+    mongodb:
+      uri: mongodb://itslaw:itslaw_168@dds-uf6ff5dfd9aef3641.mongodb.rds.aliyuncs.com:3717,dds-uf6ff5dfd9aef3642.mongodb.rds.aliyuncs.com:3717/itslaw?replicaSet=mgset-6501997
+
+  kafka:
+    bootstrap-servers: 192.168.4.237:9092,192.168.4.235:9092,192.168.4.236:9092
+    producer:
+      retries: 3
+      batch-size: 16384
+      buffer-memory: 33554432
+      key-serializer: org.apache.kafka.common.serialization.StringSerializer
+      value-serializer: org.apache.kafka.common.serialization.StringSerializer
+      # acks=0 : 生产者在成功写入消息之前不会等待任何来自服务器的响应。
+      # acks=1 : 只要集群的首领节点收到消息,生产者就会收到一个来自服务器成功响应。
+      # acks=all :只有当所有参与复制的节点全部收到消息时,生产者才会收到一个来自服务器的成功响应。
+      acks: 1
+    consumer:
+      # 该属性指定了消费者在读取一个没有偏移量的分区或者偏移量无效的情况下该作何处理:
+      # latest(默认值)在偏移量无效的情况下,消费者将从最新的记录开始读取数据(在消费者启动之后生成的记录)
+      # earliest :在偏移量无效的情况下,消费者将从起始位置读取分区的记录
+      auto-offset-reset: earliest
+      enable-auto-commit: false
+      key-deserializer: org.apache.kafka.common.serialization.StringDeserializer
+      value-deserializer: org.apache.kafka.common.serialization.StringDeserializer
+      group-id: phoenix_example_group_id
+      max-poll-records: 1
+      auto-commit-interval: 1000
+    listener:
+      # 在侦听器容器中运行的线程数。
+      concurrency: 1
+
+
+  elasticsearch:
+    bboss:
+      es5:
+        name: es5
+        elasticUser: elastic
+        elasticPassword: elastic_168
+        elasticsearch:
+          rest:
+            hostNames: es-cn-0pp0r32zf000ipovd.elasticsearch.aliyuncs.com:9200
+          dateFormat: yyyy.MM.dd
+          timeZone: Asia/Shanghai
+          showTemplate: true
+          discoverHost: false
+        dslfile:
+          refreshInterval: -1
+        http:
+          timeoutConnection: 600000
+          timeoutSocket: 600000
+          connectionRequestTimeout: 600000
+          retryTime: -1
+          maxLineLength: -1
+          maxHeaderCount: 200
+          maxTotal: 400
+          defaultMaxPerRoute: 200
+          soReuseAddress: false
+          soKeepAlive: false
+          timeToLive: 3600000
+          keepAlive: 3600000
+          keystore:
+          keyPassword:
+          hostnameVerifier:
+      es6:
+        name: es6
+        elasticUser: elastic
+        elasticPassword: elastic_168
+        elasticsearch:
+          rest:
+            hostNames: es-cn-oew22t8bw002iferu.elasticsearch.aliyuncs.com:9200
+          dateFormat: yyyy.MM.dd
+          timeZone: Asia/Shanghai
+          showTemplate: true
+          discoverHost: false
+        dslfile:
+          refreshInterval: -1
+        http:
+          timeoutConnection: 600000
+          timeoutSocket: 600000
+          connectionRequestTimeout: 600000
+          retryTime: -1
+          maxLineLength: -1
+          maxHeaderCount: 200
+          maxTotal: 400
+          defaultMaxPerRoute: 200
+          soReuseAddress: false
+          soKeepAlive: false
+          timeToLive: 3600000
+          keepAlive: 3600000
+          keystore:
+          keyPassword:
+          hostnameVerifier:
+
+hbase:
+  config:
+    hbase.zookeeper.quorum: hb-uf6m8e1nu4ivp06m5-master1-001.hbase.rds.aliyuncs.com:2181,hb-uf6m8e1nu4ivp06m5-master2-001.hbase.rds.aliyuncs.com:2181,hb-uf6m8e1nu4ivp06m5-master3-001.hbase.rds.aliyuncs.com:2181
+
+  freemarker:
+    template-loader-path: classpath:/templates/
+
+
+scheduling:
+  enabled: true

+ 2 - 74
src/main/resources/application.yml

@@ -1,11 +1,11 @@
 spring:
   profiles:
-    active: local
+    active: prod
   jackson:
     date-format: yyyy-MM-dd HH:mm:ss
     time-zone: GMT+8
 server:
-  port: 9999
+  port: 7777
 
 
 odps:
@@ -13,75 +13,3 @@ odps:
   access-key-secret: r6gWoySXC8kSK4qnfKRxEuWJ5uHIiE
   region-id: cn-shanghai
   ding-secret: SECe7b26876f443e77f872b8b10880e39b3c5dfaf44855f1aa3235372bb73698ab6
----
-spring:
-  profiles: local
-
-  data:
-    mongodb:
-      uri: mongodb://itslaw:itslaw_168@dds-uf6ff5dfd9aef3641601-pub.mongodb.rds.aliyuncs.com:3717,dds-uf6ff5dfd9aef3642555-pub.mongodb.rds.aliyuncs.com:3717/itslaw?replicaSet=mgset-6501997
-es:
-  username: elastic
-  password: elastic_168
-  #host: es-cn-oew22t8bw002iferu.public.elasticsearch.aliyuncs.com #new
-  host: es-cn-0pp0r32zf000ipovd.public.elasticsearch.aliyuncs.com
-
-hbase:
-  config:
-    hbase.zookeeper.quorum: hb-proxy-pub-uf6m8e1nu4ivp06m5-master1-001.hbase.rds.aliyuncs.com:2181,hb-proxy-pub-uf6m8e1nu4ivp06m5-master2-001.hbase.rds.aliyuncs.com:2181,hb-proxy-pub-uf6m8e1nu4ivp06m5-master3-001.hbase.rds.aliyuncs.com:2181
-    hbase.client.scanner.timeout.period: 120000
-    hbase.client.retries.number: 5
-    hbase.client.pause: 1000
-    hbase.client.max.perserver.tasks: 10
-    hbase.client.max.perregion.tasks: 10
-    hbase.client.keyvalue.maxsize: 524288000
-    hbase.client.ipc.pool.size: 5
-    zookeeper.recovery.retry: 5
-
-  freemarker:
-    template-loader-path: classpath:/templates/
----
-spring:
-  profiles: prod
-  data:
-    mongodb:
-      uri: mongodb://itslaw:itslaw_168@dds-uf6ff5dfd9aef3641.mongodb.rds.aliyuncs.com:3717,dds-uf6ff5dfd9aef3642.mongodb.rds.aliyuncs.com:3717/itslaw?replicaSet=mgset-6501997
-
-  kafka:
-    bootstrap-servers: 192.168.4.237:9092,192.168.4.235:9092,192.168.4.236:9092
-    producer:
-      retries: 3
-      batch-size: 16384
-      buffer-memory: 33554432
-      key-serializer: org.apache.kafka.common.serialization.StringSerializer
-      value-serializer: org.apache.kafka.common.serialization.StringSerializer
-      # acks=0 : 生产者在成功写入消息之前不会等待任何来自服务器的响应。
-      # acks=1 : 只要集群的首领节点收到消息,生产者就会收到一个来自服务器成功响应。
-      # acks=all :只有当所有参与复制的节点全部收到消息时,生产者才会收到一个来自服务器的成功响应。
-      acks: 1
-    consumer:
-      # 该属性指定了消费者在读取一个没有偏移量的分区或者偏移量无效的情况下该作何处理:
-      # latest(默认值)在偏移量无效的情况下,消费者将从最新的记录开始读取数据(在消费者启动之后生成的记录)
-      # earliest :在偏移量无效的情况下,消费者将从起始位置读取分区的记录
-      auto-offset-reset: earliest
-      enable-auto-commit: false
-      key-deserializer: org.apache.kafka.common.serialization.StringDeserializer
-      value-deserializer: org.apache.kafka.common.serialization.StringDeserializer
-      group-id: phoenix_example_group_id
-      max-poll-records: 1
-      auto-commit-interval: 1000
-    listener:
-      # 在侦听器容器中运行的线程数。
-      concurrency: 1
-
-es:
-  username: elastic
-  password: elastic_168
-  host: es-cn-oew22t8bw002iferu.elasticsearch.aliyuncs.com #es-cn-0pp0r32zf000ipovd.elasticsearch.aliyuncs.com
-
-hbase:
-  config:
-    hbase.zookeeper.quorum: hb-uf6m8e1nu4ivp06m5-master1-001.hbase.rds.aliyuncs.com:2181,hb-uf6m8e1nu4ivp06m5-master2-001.hbase.rds.aliyuncs.com:2181,hb-uf6m8e1nu4ivp06m5-master3-001.hbase.rds.aliyuncs.com:2181
-
-  freemarker:
-    template-loader-path: classpath:/templates/

+ 92 - 92
src/test/java/com/winhc/task/DataWorksSummaryJob.java

@@ -1,92 +1,92 @@
-package com.winhc.task;
-
-import com.alibaba.hologres.client.exception.HoloClientException;
-import com.winhc.task.bean.JobArgs;
-import com.winhc.task.common.SummaryArgs;
-import com.winhc.task.job.CalcSummaryJob;
-import com.winhc.task.util.DateUtils;
-import lombok.extern.slf4j.Slf4j;
-import org.junit.jupiter.api.Test;
-import org.springframework.beans.factory.annotation.Autowired;
-import org.springframework.boot.test.context.SpringBootTest;
-
-import java.io.IOException;
-import java.util.*;
-import java.util.stream.Collectors;
-
-
-@SpringBootTest
-@Slf4j
-public class DataWorksSummaryJob {
-
-    @Autowired
-    CalcSummaryJob calcSummaryJob;
-
-    @Test
-    public void start() throws HoloClientException, IOException {
-
-        //是否同步数据
-        Boolean synData = true;
-        //Boolean synData = false;
-        //过滤集合(集合为空跑全量)
-        //List<String> filter = Arrays.asList("company_holder", "company_staff", "company_change");
-        List<String> filter = new ArrayList<>();
-
-        //确定重跑集合(集合为空跑全量)
-        //List<String> add = Arrays.asList("company_holder");
-        //List<String> add = Arrays.asList("high_tech_enterprise");
-        //List<String> add = Arrays.asList("company_brief_cancel_announcement");
-        //List<String> add = Arrays.asList("online_retailers");
-        List<String> add = Arrays.asList("company_change");
-        //List<String> add = Arrays.asList("cancellation_announcement");
-        //List<String> add = Arrays.asList("bankruptcy_open_case");
-        //List<String> add = Arrays.asList("company_double_random_check_info");
-        //List<String> add = Arrays.asList("company_license", "company_check_info", "company_punishment_info");
-        //List<String> add = Arrays.asList("company_liquidating_info");
-        //List<String> add = Arrays.asList("company_judicial_assistance", "company_brief_cancel_announcement");
-        //List<String> add = Arrays.asList("company_holder", "company_staff");
-        //List<String> add = Arrays.asList("company_holder", "company_staff", "auction_tracking", "restrictions_on_exit");
-        //List<String> add = Arrays.asList("company_equity_info", "company_abnormal_info");
-        //List<String> add = Arrays.asList("company_equity_info");
-        //List<String> add = Arrays.asList("company_illegal_info");
-        //List<String> add = Arrays.asList("company_holder", "company_staff", "company_change");
-        //List<String> add = new ArrayList<>();
-        //List<String> add = Arrays.asList("bankruptcy_open_case", "company_court_open_announcement", "company_court_announcement", "company_send_announcement"
-        //        , "company_court_register", "litigation_mediation", "restrictions_on_exit", "auction_tracking", "property_rights_transaction");
-
-
-        List<String> list = SummaryArgs.SUMMARY_ARGS.keySet().stream()
-                .filter(x -> {
-                    boolean b1 = !filter.contains(x);
-                    boolean b2 = (add.contains(x) || add.isEmpty());
-                    return b1 && b2;
-                }).distinct().collect(Collectors.toList());
-
-        String project = "winhc_ng";
-        String flow = "syn_summary";
-        String taskName = "summary_v9";
-        String nodeId = "700005089066";
-        String holoTable = "test_tmp_xf_sum_v9";
-        String targetIndexPre = "ng_rt_summary";
-        String targetIndexSuf = DateUtils.getYesterday_ymd();
-
-        //初始参数
-        JobArgs jobArgs = JobArgs.builder()
-                .project(project)
-                .flow(flow)
-                .taskName(taskName)
-                .nodeId(nodeId)
-                .holoTable(holoTable)
-                .targetIndexPrefix(targetIndexPre)
-                .targetIndexSuffix(targetIndexSuf)
-                .build();
-        calcSummaryJob.start(list, jobArgs, synData);
-    }
-
-    @Test
-    public void start2() throws HoloClientException, IOException {
-        List<String> list = new ArrayList<>(SummaryArgs.SUMMARY_ARGS.keySet());
-        list.forEach(System.out::println);
-    }
-
-}
+//package com.winhc.task;
+//
+//import com.alibaba.hologres.client.exception.HoloClientException;
+//import com.winhc.task.bean.JobArgs;
+//import com.winhc.task.common.SummaryArgs;
+//import com.winhc.task.job.CalcSummaryJob;
+//import com.winhc.task.util.DateUtils;
+//import lombok.extern.slf4j.Slf4j;
+//import org.junit.jupiter.api.Test;
+//import org.springframework.beans.factory.annotation.Autowired;
+//import org.springframework.boot.test.context.SpringBootTest;
+//
+//import java.io.IOException;
+//import java.util.*;
+//import java.util.stream.Collectors;
+//
+//
+//@SpringBootTest
+//@Slf4j
+//public class DataWorksSummaryJob {
+//
+//    @Autowired
+//    CalcSummaryJob calcSummaryJob;
+//
+//    @Test
+//    public void start() throws HoloClientException, IOException {
+//
+//        //是否同步数据
+//        Boolean synData = true;
+//        //Boolean synData = false;
+//        //过滤集合(集合为空跑全量)
+//        //List<String> filter = Arrays.asList("company_holder", "company_staff", "company_change");
+//        List<String> filter = new ArrayList<>();
+//
+//        //确定重跑集合(集合为空跑全量)
+//        //List<String> add = Arrays.asList("company_holder");
+//        //List<String> add = Arrays.asList("high_tech_enterprise");
+//        //List<String> add = Arrays.asList("company_brief_cancel_announcement");
+//        //List<String> add = Arrays.asList("online_retailers");
+//        List<String> add = Arrays.asList("company_change");
+//        //List<String> add = Arrays.asList("cancellation_announcement");
+//        //List<String> add = Arrays.asList("bankruptcy_open_case");
+//        //List<String> add = Arrays.asList("company_double_random_check_info");
+//        //List<String> add = Arrays.asList("company_license", "company_check_info", "company_punishment_info");
+//        //List<String> add = Arrays.asList("company_liquidating_info");
+//        //List<String> add = Arrays.asList("company_judicial_assistance", "company_brief_cancel_announcement");
+//        //List<String> add = Arrays.asList("company_holder", "company_staff");
+//        //List<String> add = Arrays.asList("company_holder", "company_staff", "auction_tracking", "restrictions_on_exit");
+//        //List<String> add = Arrays.asList("company_equity_info", "company_abnormal_info");
+//        //List<String> add = Arrays.asList("company_equity_info");
+//        //List<String> add = Arrays.asList("company_illegal_info");
+//        //List<String> add = Arrays.asList("company_holder", "company_staff", "company_change");
+//        //List<String> add = new ArrayList<>();
+//        //List<String> add = Arrays.asList("bankruptcy_open_case", "company_court_open_announcement", "company_court_announcement", "company_send_announcement"
+//        //        , "company_court_register", "litigation_mediation", "restrictions_on_exit", "auction_tracking", "property_rights_transaction");
+//
+//
+//        List<String> list = SummaryArgs.SUMMARY_ARGS.keySet().stream()
+//                .filter(x -> {
+//                    boolean b1 = !filter.contains(x);
+//                    boolean b2 = (add.contains(x) || add.isEmpty());
+//                    return b1 && b2;
+//                }).distinct().collect(Collectors.toList());
+//
+//        String project = "winhc_ng";
+//        String flow = "syn_summary";
+//        String taskName = "summary_v9";
+//        String nodeId = "700005089066";
+//        String holoTable = "test_tmp_xf_sum_v9";
+//        String targetIndexPre = "ng_rt_summary";
+//        String targetIndexSuf = DateUtils.getYesterday_ymd();
+//
+//        //初始参数
+//        JobArgs jobArgs = JobArgs.builder()
+//                .project(project)
+//                .flow(flow)
+//                .taskName(taskName)
+//                .nodeId(nodeId)
+//                .holoTable(holoTable)
+//                .targetIndexPrefix(targetIndexPre)
+//                .targetIndexSuffix(targetIndexSuf)
+//                .build();
+//        calcSummaryJob.start(list, jobArgs, synData);
+//    }
+//
+//    @Test
+//    public void start2() throws HoloClientException, IOException {
+//        List<String> list = new ArrayList<>(SummaryArgs.SUMMARY_ARGS.keySet());
+//        list.forEach(System.out::println);
+//    }
+//
+//}

+ 0 - 22
src/test/java/com/winhc/task/EsTest.java

@@ -1,22 +0,0 @@
-package com.winhc.task;
-
-import org.junit.jupiter.api.Test;
-import org.springframework.beans.factory.annotation.Autowired;
-import org.springframework.boot.test.context.SpringBootTest;
-import java.io.IOException;
-
-@SpringBootTest
-public class EsTest {
-    @Autowired
-    com.winhc.task.job.EsIndexJobs esIndexJobs;
-
-    @Test
-    public void start() throws IOException {
-        esIndexJobs.start();
-    }
-
-    @Test
-    public void start2() throws IOException {
-        esIndexJobs.start2();
-    }
-}