|
@@ -0,0 +1,413 @@
|
|
|
+package com.winhc.task;
|
|
|
+
|
|
|
+import cn.hutool.core.io.FileUtil;
|
|
|
+import com.alibaba.fastjson.JSON;
|
|
|
+import com.alibaba.fastjson.JSONArray;
|
|
|
+import com.alibaba.fastjson.JSONObject;
|
|
|
+import com.alibaba.hologres.client.HoloClient;
|
|
|
+import com.alibaba.hologres.client.exception.HoloClientException;
|
|
|
+import com.aliyun.odps.Column;
|
|
|
+import com.winhc.task.bean.Summary;
|
|
|
+import com.winhc.task.common.ArgsCompanyJob;
|
|
|
+import com.winhc.task.common.Constant;
|
|
|
+import com.winhc.task.common.SummaryArgs;
|
|
|
+import com.winhc.task.job.EsScanSummaryJob;
|
|
|
+import com.winhc.task.util.*;
|
|
|
+import lombok.val;
|
|
|
+import org.apache.calcite.avatica.proto.Common;
|
|
|
+import org.apache.commons.lang3.StringUtils;
|
|
|
+import org.apache.hadoop.hbase.TableName;
|
|
|
+import org.apache.hadoop.hbase.client.*;
|
|
|
+import org.apache.hadoop.hbase.filter.PageFilter;
|
|
|
+import org.elasticsearch.action.search.SearchRequest;
|
|
|
+import org.elasticsearch.action.search.SearchResponse;
|
|
|
+import org.elasticsearch.client.RestHighLevelClient;
|
|
|
+import org.elasticsearch.index.query.WrapperQueryBuilder;
|
|
|
+import org.elasticsearch.search.SearchHit;
|
|
|
+import org.elasticsearch.search.builder.SearchSourceBuilder;
|
|
|
+import org.junit.jupiter.api.Test;
|
|
|
+import org.postgresql.model.TableSchema;
|
|
|
+import org.springframework.beans.factory.annotation.Autowired;
|
|
|
+import org.springframework.beans.factory.annotation.Qualifier;
|
|
|
+import org.springframework.boot.test.context.SpringBootTest;
|
|
|
+
|
|
|
+import java.io.File;
|
|
|
+import java.io.FileFilter;
|
|
|
+import java.io.IOException;
|
|
|
+import java.nio.charset.StandardCharsets;
|
|
|
+import java.util.*;
|
|
|
+import java.util.concurrent.CompletableFuture;
|
|
|
+import java.util.function.Function;
|
|
|
+import java.util.stream.Collectors;
|
|
|
+import java.util.stream.Stream;
|
|
|
+import java.util.stream.StreamSupport;
|
|
|
+
|
|
|
+import static com.winhc.task.common.Constant.hbase_scan_v8;
|
|
|
+import static com.winhc.task.common.Constant.tns_out;
|
|
|
+
|
|
|
+
|
|
|
+@SpringBootTest
|
|
|
+public class SynMongoCompanySummary {
|
|
|
+ @Autowired
|
|
|
+ EsScanSummaryJob esScanSummaryJob;
|
|
|
+
|
|
|
+ @Autowired
|
|
|
+ Connection connection;
|
|
|
+
|
|
|
+ @Autowired
|
|
|
+ @Qualifier(value = "v6")
|
|
|
+ RestHighLevelClient getClient;
|
|
|
+
|
|
|
+ @Autowired
|
|
|
+ @Qualifier(value = "v5")
|
|
|
+ RestHighLevelClient getOldClient;
|
|
|
+
|
|
|
+
|
|
|
+ @Test
|
|
|
+ public void start() throws IOException {
|
|
|
+ esScanSummaryJob.start();
|
|
|
+ }
|
|
|
+
|
|
|
+ @Test
|
|
|
+ public void start2() throws Exception {
|
|
|
+
|
|
|
+ //计算所有ids
|
|
|
+ esScanSummaryJob.start();
|
|
|
+ //v9 holo->holo 聚合v8 rowkey
|
|
|
+ holo();
|
|
|
+ //v8 es->holo 聚合v9 rowkey
|
|
|
+ es2HoloV8();
|
|
|
+ //v8&v9 holo->hbase->excel 数据导出excel
|
|
|
+ holoAndHbase2Excel();
|
|
|
+ //v8&v9 hbase->excel 子表scan 数据导出excel
|
|
|
+ scanHbase2Excel();
|
|
|
+ }
|
|
|
+
|
|
|
+ @Test
|
|
|
+ public void holo() throws Exception {
|
|
|
+ List<String> tns = SummaryArgs.SUMMARY_ARGS.keySet()
|
|
|
+ .stream()
|
|
|
+ .filter(tns_out::contains)
|
|
|
+ .distinct()
|
|
|
+ .collect(Collectors.toList());
|
|
|
+ String resTable = Constant.holo_res_tab;
|
|
|
+ //清空表
|
|
|
+ HoloClient holoClient = HoloUtils.init();
|
|
|
+ HoloUtils.exexSql(holoClient, "truncate table " + resTable);
|
|
|
+ holoClient.close();
|
|
|
+ tns.forEach(tn -> {
|
|
|
+ try {
|
|
|
+ rowkey2HoloV9(tn, resTable);
|
|
|
+ } catch (Exception e) {
|
|
|
+ e.printStackTrace();
|
|
|
+ }
|
|
|
+ });
|
|
|
+
|
|
|
+ }
|
|
|
+
|
|
|
+ public void rowkey2HoloV9(String tn, String resTable) throws HoloClientException {
|
|
|
+
|
|
|
+ SummaryArgs s = SummaryArgs.SUMMARY_ARGS.get(tn);
|
|
|
+ String sql = "INSERT INTO " + resTable + "\n" +
|
|
|
+ "SELECT\n" +
|
|
|
+ " rowkey, '&tn' as tn\n" +
|
|
|
+ "FROM\n" +
|
|
|
+ " &tableName\n" +
|
|
|
+ "WHERE\n" +
|
|
|
+ " &condition\n" +
|
|
|
+ "GROUP BY rowkey";
|
|
|
+
|
|
|
+ String ids = Constant.all_ids.stream().distinct()
|
|
|
+ .collect(Collectors.joining("','", "'", "'"));
|
|
|
+ String condition = s.getArgsInfo().stream()
|
|
|
+ .map(x -> x.getFilterField() + " && " + "ARRAY[" + ids + "]")
|
|
|
+ .collect(Collectors.joining(" or "));
|
|
|
+
|
|
|
+ HoloClient holoClient = HoloUtils.init();
|
|
|
+ String tableName = "ng_rt_" + tn;
|
|
|
+ String calc_sql = sql.replaceAll("&condition", condition)
|
|
|
+ .replaceAll("&tn", tn)
|
|
|
+ .replaceAll("&tableName", tableName);
|
|
|
+ System.out.println(calc_sql);
|
|
|
+ HoloUtils.exexSql(holoClient, calc_sql);
|
|
|
+ holoClient.close();
|
|
|
+
|
|
|
+ }
|
|
|
+
|
|
|
+ @Test
|
|
|
+ public void scanHbase2Excel() throws Exception {
|
|
|
+ List<String> ids = Constant.all_ids;
|
|
|
+ HoloClient holoClient = HoloUtils.init();
|
|
|
+ //补充年报rowkey
|
|
|
+ String tmpSql = "SELECT * FROM " + Constant.holo_res_tab + " where tn ='company_annual_report'";
|
|
|
+ List<Summary> pList = HoloUtils.exec(holoClient, tmpSql, Summary.class);
|
|
|
+ List<String> id_addr = pList.stream().map(Summary::getRowkey).distinct().collect(Collectors.toList());
|
|
|
+ ids.addAll(id_addr);
|
|
|
+ holoClient.close();
|
|
|
+
|
|
|
+ val futures = new ArrayList<CompletableFuture<List<JSONObject>>>();
|
|
|
+ Stream.concat(Constant.hbase_scan_v9.stream(), hbase_scan_v8.stream()).forEach(tn -> {
|
|
|
+ ids.forEach(rowkey -> {
|
|
|
+ CompletableFuture<List<JSONObject>> rew = CompletableFuture.supplyAsync(() -> {
|
|
|
+ List<JSONObject> re;
|
|
|
+ String tableName = hbase_scan_v8.contains(tn) ? "NG_" + tn.toUpperCase(Locale.ROOT) : "NG_RT_" + tn.toUpperCase(Locale.ROOT);
|
|
|
+ try (val table = connection.getTable(TableName.valueOf(tableName))) {
|
|
|
+ Scan scan = new Scan();
|
|
|
+ scan.setRowPrefixFilter(rowkey.getBytes());
|
|
|
+ PageFilter pageFilter = new PageFilter(100);
|
|
|
+ scan.setFilter(pageFilter);
|
|
|
+ ResultScanner scanner = table.getScanner(scan);
|
|
|
+ re = StreamSupport.stream(scanner.spliterator(), false)
|
|
|
+ .filter(result -> result != null && !result.isEmpty())
|
|
|
+ .map(BaseUtils::toJSONObjectLowerCase)
|
|
|
+ .map(j -> j.fluentPut("tn", tn))
|
|
|
+ .collect(Collectors.toList());
|
|
|
+
|
|
|
+ } catch (Exception e) {
|
|
|
+ throw new RuntimeException("fetchHbase error" + e.getMessage(), e);
|
|
|
+ }
|
|
|
+ return re;
|
|
|
+ }, BaseUtils.COMMON_POOL);
|
|
|
+ futures.add(rew);
|
|
|
+ });
|
|
|
+ });
|
|
|
+
|
|
|
+ CompletableFuture.allOf(futures.toArray(new CompletableFuture[0])).get();
|
|
|
+ List<JSONObject> resList = futures.stream().flatMap(x -> {
|
|
|
+ try {
|
|
|
+ return x.get().stream();
|
|
|
+ } catch (Exception e) {
|
|
|
+ throw new RuntimeException(" 222, fetch data error" + e.getMessage(), e);
|
|
|
+ }
|
|
|
+ }).filter(Objects::nonNull).collect(Collectors.toList());
|
|
|
+
|
|
|
+ Map<String, List<JSONObject>> dataList = resList.stream()
|
|
|
+ .collect(Collectors.groupingBy(o -> o.getString("tn")));
|
|
|
+
|
|
|
+ dataList.forEach(this::saveExcel);
|
|
|
+ }
|
|
|
+
|
|
|
+ @Test
|
|
|
+ public void es2HoloV8() throws Exception {
|
|
|
+ List<String> v8_tns = Arrays.asList(
|
|
|
+ "company_ipr_pledge",
|
|
|
+ "company_bid",
|
|
|
+ "company_employment",
|
|
|
+ "company_certificate",
|
|
|
+ "company_customs_credit",
|
|
|
+ "company_bond",
|
|
|
+ "company_tele_license",
|
|
|
+ "company_weibo",
|
|
|
+ "company_wechat");
|
|
|
+ v8_tns.forEach(tn -> {
|
|
|
+ try {
|
|
|
+ calc_v8(tn);
|
|
|
+ } catch (Exception e) {
|
|
|
+ e.printStackTrace();
|
|
|
+ }
|
|
|
+ });
|
|
|
+ }
|
|
|
+
|
|
|
+ public void calc_v8(String tn) throws Exception {
|
|
|
+
|
|
|
+ String dsl = "{\n" +
|
|
|
+ " \"query\": {\n" +
|
|
|
+ " \"bool\": {\n" +
|
|
|
+ " \"should\": [\n" +
|
|
|
+ " &condition\n" +
|
|
|
+ " ],\n" +
|
|
|
+ " \"minimum_should_match\": 1\n" +
|
|
|
+ " }\n" +
|
|
|
+ " }\n" +
|
|
|
+ "}";
|
|
|
+
|
|
|
+ List<JSONObject> collect = Constant.es_filters.get(tn).stream()
|
|
|
+ .map(col -> new JSONObject()
|
|
|
+ .fluentPut("terms", new JSONObject()
|
|
|
+ .fluentPut(col, JSONArray.parseArray(JSON.toJSONString(Constant.all_ids))))).collect(Collectors.toList());
|
|
|
+ String ds1 = dsl.replaceAll("&condition", JSONObject.toJSONString(collect));
|
|
|
+
|
|
|
+ SearchRequest searchRequest = new SearchRequest("winhc_index_" + tn).types("_doc");
|
|
|
+ SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
|
|
|
+ searchSourceBuilder.size(10000);
|
|
|
+ if (StringUtils.isNotEmpty(ds1)) {
|
|
|
+ WrapperQueryBuilder query = QueryBuilderUtils.getQuery(JSONObject.parseObject(ds1));
|
|
|
+ searchSourceBuilder.query(query);
|
|
|
+ }
|
|
|
+ searchRequest.source(searchSourceBuilder);
|
|
|
+ SearchResponse searchResponse = getClient.search(searchRequest);
|
|
|
+ HoloClient holoClient = HoloUtils.init();
|
|
|
+ TableSchema tableSchema = holoClient.getTableSchema(Constant.holo_res_tab);
|
|
|
+ List<com.alibaba.hologres.client.Put> rePut = Arrays.stream(searchResponse.getHits().getHits())
|
|
|
+ .map(SearchHit::getId).distinct()
|
|
|
+ .map(rowkey -> {
|
|
|
+ com.alibaba.hologres.client.Put put = new com.alibaba.hologres.client.Put(tableSchema);
|
|
|
+ put.setObject("tn", tn);
|
|
|
+ put.setObject("rowkey", rowkey);
|
|
|
+ return put;
|
|
|
+ })
|
|
|
+ .collect(Collectors.toList());
|
|
|
+
|
|
|
+ holoClient.put(rePut);
|
|
|
+ holoClient.close();
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ @Test
|
|
|
+ public void holoAndHbase2Excel() throws Exception {
|
|
|
+ HoloClient holoClient = HoloUtils.init();
|
|
|
+ String tmpSql = "SELECT * FROM " + Constant.holo_res_tab + "";
|
|
|
+ List<Summary> pList = HoloUtils.exec(holoClient, tmpSql, Summary.class);
|
|
|
+ holoClient.close();
|
|
|
+
|
|
|
+ Map<String, List<Summary>> gList = pList.stream().collect(Collectors.groupingBy(Summary::getTn));
|
|
|
+ val futures = new ArrayList<CompletableFuture<List<JSONObject>>>();
|
|
|
+ gList.forEach((tn, keys) -> {
|
|
|
+ if (!tn.equals("wenshu_detail_v2")) {
|
|
|
+ futures.add(fetchHbase(tn, keys));
|
|
|
+ } else {
|
|
|
+ futures.add(fetchES(tn, keys));
|
|
|
+ }
|
|
|
+ });
|
|
|
+
|
|
|
+ CompletableFuture.allOf(futures.toArray(new CompletableFuture[0])).get();
|
|
|
+ futures.forEach(x -> {
|
|
|
+ try {
|
|
|
+ x.get();
|
|
|
+ } catch (Exception e) {
|
|
|
+ throw new RuntimeException("error 1111 :" + e.getMessage(), e);
|
|
|
+ }
|
|
|
+ });
|
|
|
+
|
|
|
+
|
|
|
+ }
|
|
|
+
|
|
|
+ private CompletableFuture<List<JSONObject>> fetchHbase(String tn, List<Summary> keys) {
|
|
|
+ String tableName = Constant.v8_list.contains(tn) ? "NG_" + tn.toUpperCase(Locale.ROOT) : "NG_RT_" + tn.toUpperCase(Locale.ROOT);
|
|
|
+ return CompletableFuture.supplyAsync(() -> {
|
|
|
+ List<JSONObject> re = new ArrayList<>();
|
|
|
+ val gs = keys.stream().map(Summary::getRowkey).
|
|
|
+ distinct().map(vk -> vk.getBytes(StandardCharsets.UTF_8))
|
|
|
+ .map(Get::new).collect(Collectors.toList());
|
|
|
+ try (val table = connection.getTable(TableName.valueOf(tableName))) {
|
|
|
+ val rs = table.get(gs);
|
|
|
+ if (rs != null) {
|
|
|
+ re = Stream.of(rs)
|
|
|
+ .filter(result -> result != null && !result.isEmpty())
|
|
|
+ .map(BaseUtils::toJSONObjectLowerCase)
|
|
|
+ .collect(Collectors.toList());
|
|
|
+ }
|
|
|
+ } catch (Exception e) {
|
|
|
+ throw new RuntimeException("fetchHbase error" + e.getMessage(), e);
|
|
|
+ }
|
|
|
+ saveExcel(tn, re);
|
|
|
+ return re;
|
|
|
+ }, BaseUtils.COMMON_POOL);
|
|
|
+ }
|
|
|
+
|
|
|
+ private CompletableFuture<List<JSONObject>> fetchES(String tn, List<Summary> keys) {
|
|
|
+ return CompletableFuture.supplyAsync(() -> {
|
|
|
+ List<JSONObject> re = new ArrayList<>();
|
|
|
+ String dsl = "{\n" +
|
|
|
+ " \"query\": {\n" +
|
|
|
+ " \"terms\": {\n" +
|
|
|
+ " \"_id\": [\n" +
|
|
|
+ " \"&ids\"\n" +
|
|
|
+ " ]\n" +
|
|
|
+ " }\n" +
|
|
|
+ " }\n" +
|
|
|
+ "}";
|
|
|
+
|
|
|
+ String ds1 = dsl.replaceAll("&ids", keys.stream().map(Summary::getRowkey)
|
|
|
+ .collect(Collectors.joining("\",\"", "", "")));
|
|
|
+ SearchRequest searchRequest = new SearchRequest("wenshu_detail2").types("wenshu_detail_type");
|
|
|
+ SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
|
|
|
+ searchSourceBuilder.size(10000);
|
|
|
+ if (StringUtils.isNotEmpty(ds1)) {
|
|
|
+ WrapperQueryBuilder query = QueryBuilderUtils.getQuery(JSONObject.parseObject(ds1));
|
|
|
+ searchSourceBuilder.query(query);
|
|
|
+ }
|
|
|
+ searchRequest.source(searchSourceBuilder);
|
|
|
+ try {
|
|
|
+ SearchResponse searchResponse = getOldClient.search(searchRequest);
|
|
|
+ re = Arrays.stream(searchResponse.getHits().getHits()).map(x -> {
|
|
|
+ Map<String, Object> sourceAsMap = x.getSourceAsMap();
|
|
|
+ String id = x.getId();
|
|
|
+ sourceAsMap.put("rowkey", id);
|
|
|
+ JSONObject re1 = new JSONObject(sourceAsMap);
|
|
|
+ return re1;
|
|
|
+ }).collect(Collectors.toList());
|
|
|
+ } catch (IOException e) {
|
|
|
+ e.printStackTrace();
|
|
|
+ }
|
|
|
+ saveExcel(tn, re);
|
|
|
+ return re;
|
|
|
+ }, BaseUtils.COMMON_POOL);
|
|
|
+ }
|
|
|
+
|
|
|
+ private void saveExcel(String tn, List<JSONObject> re) {
|
|
|
+ String tableName = Constant.v8_list.contains(tn) ? "inc_ads_" + tn : "inc_ads_" + tn + "_v9";
|
|
|
+ List<String> filter_list = new ArrayList<>();
|
|
|
+ if (Constant.tn_cols_filters.containsKey(tn)) {
|
|
|
+ filter_list = Constant.tn_cols_filters.get(tn);
|
|
|
+ }
|
|
|
+ final List<String> finalFilter_list = filter_list;
|
|
|
+ List<String> cols = OdpsUtils.getOdps()
|
|
|
+ .tables()
|
|
|
+ .get(tableName)
|
|
|
+ .getSchema()
|
|
|
+ .getColumns()
|
|
|
+ .stream()
|
|
|
+ .map(Column::getName)
|
|
|
+ .distinct()
|
|
|
+ .filter(x -> !finalFilter_list.contains(x))
|
|
|
+ .collect(Collectors.toList());
|
|
|
+
|
|
|
+ List<List<String>> head = cols.stream()
|
|
|
+ .map(Collections::singletonList)
|
|
|
+ .collect(Collectors.toList());
|
|
|
+
|
|
|
+ List<List<String>> dataList = re.stream().map(d -> {
|
|
|
+ List<String> dataRow = new ArrayList<>();
|
|
|
+ cols.forEach(c -> {
|
|
|
+ dataRow.add(d.getString(c));
|
|
|
+ });
|
|
|
+ return dataRow;
|
|
|
+ }).collect(Collectors.toList());
|
|
|
+
|
|
|
+ EasyExcelUtil easyExcelUtil = new EasyExcelUtil();
|
|
|
+
|
|
|
+ String path = "D:\\tmp\\test4\\" + tn + ".xlsx";
|
|
|
+ easyExcelUtil.init(path, "sheet1", head);
|
|
|
+ easyExcelUtil.doExportExcel(dataList);
|
|
|
+ //关闭流
|
|
|
+ easyExcelUtil.finish();
|
|
|
+ }
|
|
|
+
|
|
|
+ @Test
|
|
|
+ public void compare() throws Exception {
|
|
|
+ List<File> fileList = FileUtil.loopFiles(new File("D:\\tmp\\test4"), new FileFilter() {
|
|
|
+ @Override
|
|
|
+ public boolean accept(File pathname) {
|
|
|
+ if (pathname.getName().endsWith(".xlsx")) {
|
|
|
+ return true;
|
|
|
+ }
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ });
|
|
|
+ List<String> out_tns = fileList.stream().map(d -> d.getName().replace(".xlsx", ""))
|
|
|
+ .collect(Collectors.toList());
|
|
|
+ System.out.println(out_tns);
|
|
|
+// List<String> tmp_tns = tns_out.stream().distinct().collect(Collectors.toList());
|
|
|
+// System.out.println(tmp_tns);
|
|
|
+
|
|
|
+ Map<String, Long> collect = tns_out.stream().collect(Collectors.groupingBy(Function.identity(), Collectors.counting()));
|
|
|
+ System.out.println(collect);
|
|
|
+
|
|
|
+
|
|
|
+ String loss_tns = tns_out.stream().filter(x -> !out_tns.contains(x)).collect(Collectors.joining("\n"));
|
|
|
+ System.out.println(loss_tns);
|
|
|
+
|
|
|
+ }
|
|
|
+
|
|
|
+}
|