|
@@ -9,9 +9,9 @@ from threading import Thread
|
|
|
from utils.datetime_utils import datetime_format
|
|
|
from log import get_log
|
|
|
from sdk.WinhcAllClient import get_all_client
|
|
|
-from utils.datetime_utils import get_ds, get_now, datetime_format_transform
|
|
|
+from utils.datetime_utils import get_ds, get_now, datetime_format_transform, get_yesterday_ds
|
|
|
from utils import map_2_json_str, json_path
|
|
|
-from utils.base_utils import tuple_max
|
|
|
+from utils.base_utils import tuple_max, get_str_intersection
|
|
|
from utils.mysql_utils import insert_many
|
|
|
from utils.xxl_queue import xxl_queue
|
|
|
import re
|
|
@@ -28,6 +28,7 @@ log = get_log('cpa_agg')
|
|
|
|
|
|
holo_client = all_client.get_holo_client(db='winhc_biz')
|
|
|
HOLO_TABLE_NAME = 'public.ads_waa_dim_info'
|
|
|
+all_sdk = get_all_client()
|
|
|
|
|
|
|
|
|
def get_max_data(data: list, key: str, exclude_product_name: list = ['winhc']):
|
|
@@ -59,7 +60,7 @@ def get_all_data_by_item(data: list, key):
|
|
|
return result_data
|
|
|
|
|
|
|
|
|
-def data_transform(data: list):
|
|
|
+def data_transform(data: list, rabbit_mq):
|
|
|
log.info('input data: {}'.format(data))
|
|
|
deleted_key = [i['_id'] for i in data][0]
|
|
|
deleted_key = deleted_key[:deleted_key.rfind('_')]
|
|
@@ -135,7 +136,11 @@ def data_transform(data: list):
|
|
|
log.info('output data: {}'.format(li))
|
|
|
|
|
|
if li is not None and len(li) > 0:
|
|
|
- insert_many(li, holo_keys, HOLO_TABLE_NAME, holo_client)
|
|
|
+ # insert_many(li, holo_keys, HOLO_TABLE_NAME, holo_client)
|
|
|
+ li = [tuple(i.values()) for i in li]
|
|
|
+ for i in li:
|
|
|
+ rabbit_mq.send_by_fanout("cpa_insert_holo", json.dumps(i, ensure_ascii=False).encode())
|
|
|
+ pass
|
|
|
|
|
|
del_num = 0
|
|
|
try:
|
|
@@ -153,13 +158,19 @@ q = queue.Queue(5000)
|
|
|
|
|
|
class Work(Thread):
|
|
|
def run(self):
|
|
|
+ r_sdk = all_sdk.get_rabbit_mq_sdk()
|
|
|
while True:
|
|
|
- data_transform(q.get())
|
|
|
+ data_transform(q.get(), r_sdk)
|
|
|
|
|
|
|
|
|
today_ds = get_ds()
|
|
|
+yesterday_ds = get_yesterday_ds()
|
|
|
+
|
|
|
+# scan_ds = today_ds[:-2]
|
|
|
+scan_ds = get_str_intersection(today_ds, yesterday_ds)
|
|
|
+
|
|
|
|
|
|
-scan_ds = today_ds[:-2]
|
|
|
+# scan_ds = '2022'
|
|
|
|
|
|
|
|
|
def overwrite_handle(key, obj_list):
|
|
@@ -176,8 +187,11 @@ def overwrite_handle(key, obj_list):
|
|
|
pass
|
|
|
|
|
|
|
|
|
+import tqdm
|
|
|
+
|
|
|
+
|
|
|
def main(max_round: int = 2, interval_of_sed: int = 300):
|
|
|
- thread_num = 10
|
|
|
+ thread_num = 20
|
|
|
|
|
|
for i in range(thread_num):
|
|
|
w = Work()
|
|
@@ -189,10 +203,11 @@ def main(max_round: int = 2, interval_of_sed: int = 300):
|
|
|
while True:
|
|
|
round_num += 1
|
|
|
|
|
|
- log.info('{},第{}遍轮循...'.format(scan_ds, round_num))
|
|
|
+ print('{},第{}遍轮循...'.format(scan_ds, round_num))
|
|
|
xxl_q = xxl_queue(pop_threshold=2, overwrite_handle=overwrite_handle)
|
|
|
# for i in col.find({"_id": {"$regex": "^" + ds}}).batch_size(200):
|
|
|
- for i in col.find({"_id": {"$regex": "^" + scan_ds}}).batch_size(200):
|
|
|
+
|
|
|
+ for i in tqdm.tqdm(col.find({"_id": {"$regex": "^" + scan_ds}}).batch_size(200)):
|
|
|
# for i in col.find().batch_size(200):
|
|
|
_id = i['_id']
|
|
|
key = _id[:_id.rfind('_')]
|
|
@@ -205,7 +220,7 @@ def main(max_round: int = 2, interval_of_sed: int = 300):
|
|
|
break
|
|
|
|
|
|
try:
|
|
|
- log.info('{},第{}遍轮循结束.'.format(scan_ds, round_num))
|
|
|
+ print('{},第{}遍轮循结束.'.format(scan_ds, round_num))
|
|
|
time.sleep(interval_of_sed)
|
|
|
pass
|
|
|
except:
|