Преглед на файлове

Merge branch 'master' of http://139.224.213.4:3000/bigdata/DataWorks-flow-touch

许家凯 преди 4 години
родител
ревизия
e29e6e240a

src/main/java/com/winhc/dataworks/flow/touch/script/ads_simple_ods.sql → src/main/java/com/winhc/dataworks/flow/touch/script/inc_cid_sql/ads_simple_ods.sql


+ 91 - 0
src/main/java/com/winhc/dataworks/flow/touch/script/inc_cids_sql/ads_flat_fold_ods.sql

@@ -0,0 +1,91 @@
+--odps sql
+--********************************************************************--
+--author:yyn
+--create time:2020-06-24 14:18:55
+--需打平并再折叠的维度增量数据插入、复制方案
+--${PROJECT},空间名
+--${DIM_TABLE},维度表,如:company_icp,下面会在各个层次添加前缀,如:winhc_eci_dev.inc_ads_company_icp
+--${DS},当天分区
+--${DIM_COLUMS},维度表的字段集,不含rowkey、cid、cids、new_cid、new_cids
+--${DIM_LIST_COLUMS},维度LIST表的字段集,不含rowkey、cid、cids、new_cid、new_cids
+--${DUPLI_COLS},维度表的去重字段集
+--********************************************************************--
+--1 将增量维度ods表打平
+INSERT OVERWRITE TABLE ${PROJECT}.inc_tmp_${DIM_TABLE}
+SELECT  cid,${DIM_COLUMS}
+FROM    ${PROJECT}.inc_ods_${DIM_TABLE} a
+LATERAL VIEW explode(split(cids,';')) b AS cid
+WHERE   ds >= ${DS}
+AND     cids IS NOT NULL
+AND     trim(cids) <> ''
+;
+--2 再取得当天企业增量数据
+WITH MAPPING AS(
+    SELECT  cid,current_cid as new_cid
+    FROM    ${PROJECT}.inc_ods_company
+    WHERE   ds >= ${DS}
+    AND     cid IS NOT NULL
+    AND     current_cid IS NOT NULL
+    GROUP BY cid,current_cid
+)
+--3 替换最新cid、去重、复制方案形成List表
+INSERT OVERWRITE TABLE  ${PROJECT}.inc_ads_${DIM_TABLE}_list PARTITION(ds=${DS})
+SELECT  CONCAT_WS('_',new_cid,id) AS rowkey
+        --,"0" as flag
+        ,CAST(new_cid as string) AS new_cid
+        ,cid
+        ,${DIM_LIST_COLUMS}
+FROM    (
+        SELECT
+                *
+                ,ROW_NUMBER() OVER (PARTITION BY ${DUPLI_COLS} ORDER BY update_time DESC ) num
+        FROM    (
+                SELECT
+                        c.*
+                        ,coalesce(d.new_cid,c.cid) AS new_cid
+                FROM    ${PROJECT}.inc_tmp_${DIM_TABLE} c
+                LEFT JOIN MAPPING d
+                ON      c.cid = d.cid
+                ) e
+        ) f
+WHERE   num =1
+UNION ALL
+SELECT  CONCAT_WS('_',new_cid,id) AS rowkey
+--        ,"1" as flag
+        ,CAST(new_cid as string) AS new_cid
+        ,cid
+        ,${DIM_LIST_COLUMS}
+FROM    (
+            SELECT  a.new_cid
+                    ,${DIM_LIST_COLUMS}
+                    ,a.cid
+                    ,ROW_NUMBER() OVER (PARTITION BY ${DUPLI_COLS} ORDER BY update_time DESC ) num
+            FROM    MAPPING a
+            JOIN    (
+                        SELECT  new_cid AS cid
+                                ,${DIM_LIST_COLUMS}
+                        FROM    ${PROJECT}.inc_ads_${DIM_TABLE}_list
+                        WHERE   ds >= ${DS}
+                        UNION ALL
+                        SELECT  new_cid AS cid
+                                ,${DIM_LIST_COLUMS}
+                        FROM    ${PROJECT}.ads_${DIM_TABLE}_list
+                        WHERE   ds >= ${DS}
+                    ) b
+            ON      a.cid = b.cid
+        ) c
+WHERE   num = 1
+;
+-- 4 主表按照id去重落库
+INSERT OVERWRITE TABLE  ${PROJECT}.inc_ads_${DIM_TABLE} PARTITION(ds=${DS})
+SELECT  cids AS new_cids,cids,${DIM_COLUMS}
+FROM    (
+            SELECT  cids AS new_cids,cids,${DIM_COLUMS}
+                    ,ROW_NUMBER() OVER (PARTITION BY id ORDER BY update_time DESC ) num
+            FROM    ${PROJECT}.inc_ods_${DIM_TABLE}
+            WHERE   ds >= ${DS}
+            AND     cids IS NOT NULL
+            AND     trim(cids) <> ''
+        ) a
+WHERE   num = 1
+;

+ 2 - 2
src/main/java/com/winhc/dataworks/flow/touch/script/ads_simple_ods.sql

@@ -6,7 +6,7 @@
 --${PROJECT},空间名
 --${DIM_TABLE},维度表,如:company_icp,下面会在各个层次添加前缀,如:winhc_eci_dev.inc_ads_company_icp
 --${DS},当天分区
---${DIM_COLUMS},维度表的字段集
+--${DIM_COLUMS},维度表的字段集,不含cid、new_cid字段
 --${DUPLI_COLS},维度表的去重字段集
 --********************************************************************--
 
@@ -53,7 +53,7 @@ FROM    (
                     ,a.cid
                     ,${DIM_COLUMS}
                     ,ROW_NUMBER() OVER (PARTITION BY ${DUPLI_COLS} ORDER BY update_time DESC ) num
-            FROM    ${PROJECT}.inc_ods_${DIM_TABLE} a
+            FROM    ${PROJECT}.inc_tmp_${DIM_TABLE} a--此处与ads_simple_ods.sql不同,是从tmp层获取数据,记得该tmp表无new_cid
             LEFT JOIN MAPPING b
             ON      a.cid = b.cid
             WHERE   a.ds >= ${DS}

+ 193 - 0
src/main/java/com/winhc/dataworks/flow/touch/script/inc_company_land_mortgage_sql/tmp_company_land_mortgage_ods.sql

@@ -0,0 +1,193 @@
+--odps sql
+--********************************************************************--
+--author:yyn
+--create time:2020-06-28 13:53:15
+--土地抵押维度增量数据插入、复制方案
+--${PROJECT},空间名
+--${PROJECT_MAP},company_map空间名,测试用
+--${DS},当天分区
+--********************************************************************--
+
+INSERT OVERWRITE TABLE ${PROJECT}.inc_tmp_company_land_mortgage PARTITION (ds=${DS})
+SELECT --只有抵押权人
+    CONCAT_WS("_",new_cid,id) AS rowkey,
+--    NVL(B.new_cid,A.mortgagor_cid)   AS mortgagor_cid ,--没有抵押人
+--    NVL(B.new_cid,A.mortgagee_cid)   AS new_cid,--这里仅做到基本与ods层相同即可,new_cid交由后续节点处理
+    cid,
+    'mortgagee' AS  type,
+    id                      ,
+--	mortgagor_cid           ,
+--	mortgagee_cid           ,
+	land_mark               ,
+	land_num                ,
+	land_aministrative_area ,
+	land_loc                ,
+	land_area               ,
+	other_item_num          ,
+	use_right_num           ,
+	mortgagor               ,
+	mortgagee               ,
+	nature                  ,
+	use_for                 ,
+	use_type                ,
+	area                    ,
+	evaluate_amount         ,
+	mortgage_amount         ,
+	source_url              ,
+	start_date,
+	end_date,
+	create_time,
+	update_time,
+	deleted
+FROM ${PROJECT}.inc_ods_company_land_mortgage AS A
+LEFT JOIN ${PROJECT_MAP}.company_map AS B
+ON A.mortgagee_cid=B.cid
+WHERE A.ds>${DS}
+AND A.mortgagor_cid is NULL--没有抵押人
+AND A.mortgagee_cid is NOT NULL--但有抵押权人
+UNION --只有抵押人
+SELECT
+    CONCAT_WS("_",new_cid,id) AS rowkey,
+--    NVL(B.new_cid,A.mortgagor_cid)   AS new_cid ,
+    cid,
+--    NVL(B.new_cid,A.mortgagee_cid)   AS cid,--没有抵押权人
+    'mortgagor' AS  type,
+    id                     ,
+--	mortgagor_cid           ,
+--	mortgagee_cid           ,
+	land_mark               ,
+	land_num                ,
+	land_aministrative_area ,
+	land_loc                ,
+	land_area               ,
+	other_item_num          ,
+	use_right_num           ,
+	mortgagor               ,
+	mortgagee               ,
+	nature                  ,
+	use_for                 ,
+	use_type                ,
+	area                    ,
+	evaluate_amount         ,
+	mortgage_amount         ,
+	source_url              ,
+	start_date,
+	end_date,
+	create_time,
+	update_time,
+	deleted
+FROM ${PROJECT}.inc_ods_company_land_mortgage AS A
+LEFT JOIN ${PROJECT_MAP}.company_map AS B
+ON A.mortgagor_cid=B.cid
+WHERE A.ds>${DS}
+AND A.mortgagor_cid is NOT NULL--但有抵押人
+AND A.mortgagee_cid is NULL--没有抵押权人
+UNION--抵押人与抵押权人为同一人或企业(cid)
+SELECT
+    CONCAT_WS("_",new_cid,id) AS rowkey,
+--    NVL(B.new_cid,A.mortgagor_cid)   AS new_cid ,
+    cid,
+--    NVL(B.new_cid,A.mortgagee_cid)   AS cid,--与mortgagor_cid相同
+    'bothsame' AS  type,
+    id                     ,
+--	mortgagor_cid           ,
+--	mortgagee_cid           ,
+	land_mark               ,
+	land_num                ,
+	land_aministrative_area ,
+	land_loc                ,
+	land_area               ,
+	other_item_num          ,
+	use_right_num           ,
+	mortgagor               ,
+	mortgagee               ,
+	nature                  ,
+	use_for                 ,
+	use_type                ,
+	area                    ,
+	evaluate_amount         ,
+	mortgage_amount         ,
+	source_url              ,
+	start_date,
+	end_date,
+	create_time,
+	update_time,
+	deleted
+FROM ${PROJECT}.inc_ods_company_land_mortgage AS A
+LEFT JOIN ${PROJECT_MAP}.company_map AS B
+ON A.mortgagor_cid=B.cid
+WHERE A.ds>${DS}
+AND A.mortgagor_cid is NOT NULL AND A.mortgagor_cid=A.mortgagee_cid--抵押人与抵押权人相同
+UNION--抵押人与抵押权人都有但不为同一人或企业(cid),拆成二条的第一条(抵押人)
+SELECT
+    CONCAT_WS("_",new_cid,id) AS rowkey,
+--    NVL(B.new_cid,A.mortgagor_cid)   AS new_cid ,--一分为二的第一条
+    cid,
+--    NVL(B.new_cid,A.mortgagee_cid)   AS cid,
+    'bothone' AS  type,
+    id                      ,
+--	mortgagor_cid           ,
+--	mortgagee_cid           ,
+	land_mark               ,
+	land_num                ,
+	land_aministrative_area ,
+	land_loc                ,
+	land_area               ,
+	other_item_num          ,
+	use_right_num           ,
+	mortgagor               ,
+	mortgagee               ,
+	nature                  ,
+	use_for                 ,
+	use_type                ,
+	area                    ,
+	evaluate_amount         ,
+	mortgage_amount         ,
+	source_url              ,
+	start_date,
+	end_date,
+	create_time,
+	update_time,
+	deleted
+FROM ${PROJECT}.inc_ods_company_land_mortgage AS A
+LEFT JOIN ${PROJECT_MAP}.company_map AS B
+ON A.mortgagor_cid=B.cid
+WHERE A.ds>${DS}
+AND A.mortgagor_cid is NOT NULL AND A.mortgagee_cid is NOT NULL AND A.mortgagor_cid!=A.mortgagee_cid--抵押人、抵押权人都有但不相同
+UNION--抵押人与抵押权人都有但不为同一人或企业(cid),拆成二条的第二条(抵押权人)
+SELECT
+    CONCAT_WS("_",new_cid,id) AS rowkey,
+--    NVL(B.new_cid,A.mortgagor_cid)   AS cid ,
+--    NVL(B.new_cid,A.mortgagee_cid)   AS new_cid,--一分为二的第二条
+    cid,
+    'bothtwo' AS  type,
+    id                      ,
+--	mortgagor_cid           ,
+--	mortgagee_cid           ,
+	land_mark               ,
+	land_num                ,
+	land_aministrative_area ,
+	land_loc                ,
+	land_area               ,
+	other_item_num          ,
+	use_right_num           ,
+	mortgagor               ,
+	mortgagee               ,
+	nature                  ,
+	use_for                 ,
+	use_type                ,
+	area                    ,
+	evaluate_amount         ,
+	mortgage_amount         ,
+	source_url              ,
+	start_date,
+	end_date,
+	create_time,
+	update_time,
+	deleted
+FROM ${PROJECT}.inc_ods_company_land_mortgage AS A
+LEFT JOIN ${PROJECT_MAP}.company_map AS B
+ON A.mortgagee_cid=B.cid
+WHERE A.ds>${DS}
+AND A.mortgagor_cid is NOT NULL AND A.mortgagee_cid is NOT NULL AND A.mortgagor_cid!=A.mortgagee_cid--抵押人、抵押权人都有但不相同
+;