فهرست منبع

feat: 调整数据比对结构

许家凯 2 سال پیش
والد
کامیت
8f1671de17
2فایلهای تغییر یافته به همراه11 افزوده شده و 1 حذف شده
  1. 4 0
      handle/pull_sample_data.py
  2. 7 1
      spider/cpa_agg.py

+ 4 - 0
handle/pull_sample_data.py

@@ -120,8 +120,10 @@ def pull_by_max(size=100000):
 
     all_ds = get_partition_ds(tab='out_winhc_data_analysis_pull_data', project='winhc_ng')
     if latest_ds not in all_ds:
+        log.info("exec sql: {}".format(sql))
         instance = odps_sdk.run_sql(sql)
         instance.wait_for_success()
+    log.info("开始推送数据...")
 
     with odps_sdk.execute_sql(
             'select * from out_winhc_data_analysis_pull_data where ds = ' + latest_ds + '').open_reader(
@@ -151,6 +153,8 @@ def pull_by_max(size=100000):
             # print(map_2_json_str(ele))
             r_sdk.send_by_fanout(RABBITMQ_TOPIC, json.dumps(ele, ensure_ascii=False).encode())
 
+    log.info('数据推送完成.')
+
     pass
 
 

+ 7 - 1
spider/cpa_agg.py

@@ -29,13 +29,18 @@ holo_client = all_client.get_holo_client(db='winhc_biz')
 HOLO_TABLE_NAME = 'public.ads_waa_dim_info'
 
 
-def get_max_data(data: list, key: str):
+def get_max_data(data: list, key: str, exclude_product_name: list = ['winhc']):
     max_data = None
     for i in data:
         tmp_v = json_path(i, key)
         if tmp_v is None:
             continue
         product_name = i['competitor_product_name']
+
+        if product_name in exclude_product_name:
+            continue
+            pass
+
         if max_data is None:
             max_data = (tmp_v, product_name)
         else:
@@ -150,6 +155,7 @@ def main(max_round: int = 2, interval_of_sed: int = 300):
             break
 
         try:
+            log.info('{},第{}遍轮循结束.'.format(ds, round_num))
             time.sleep(interval_of_sed)
             pass
         except: