Browse Source

feat: 添加天眼查数据对比

许家凯 2 năm trước cách đây
mục cha
commit
ffdda4d5a8
1 tập tin đã thay đổi với 112 bổ sung71 xóa
  1. 112 71
      src/main/scala/com/winhc/bigdata/spark/ng/jobs/V7DataCompare.scala

+ 112 - 71
src/main/scala/com/winhc/bigdata/spark/ng/jobs/V7DataCompare.scala

@@ -96,59 +96,62 @@ case class V7DataCompare(s: SparkSession
       sql(
         s"""
            |INSERT OVERWRITE TABLE winhc_ng.tmp_cid_company_id_mapping PARTITION (ds = '${BaseUtil.getYesterday()}')
-           |select  company_id
+           |SELECT  company_id
            |        ,cid
            |        ,name
-           |from (
-           |select *
-           |       ,ROW_NUMBER() OVER(PARTITION BY company_id,cid ORDER BY company_id DESC) AS xjk_num
-           |from (
-           |SELECT  t1.company_id
-           |        ,t2.cid
-           |        ,t1.name
-           |FROM    (
-           |         SELECT  company_id
-           |                 ,reg_number
-           |                 ,name
-           |         FROM    (
-           |                    ${company_tuple._1}
-           |                 )
-           |         where reg_number is not null and reg_number <> '' and trim(reg_number)<>''
-           |        ) AS t1
-           |join (
-           |         SELECT  cid
-           |                 ,reg_number
-           |         FROM    (
-           |                 ${v7_company_tuple._1}
-           |                 )
-           |         where reg_number is not null and reg_number <> '' and trim(reg_number)<>''
-           |     ) AS t2
-           |ON      t1.reg_number = t2.reg_number
-           |UNION ALL
-           |SELECT  t1.company_id
-           |        ,t2.cid
-           |        ,t1.name
            |FROM    (
-           |         SELECT  company_id
-           |                 ,credit_code
-           |                 ,name
-           |         FROM    (
-           |                    ${company_tuple._1}
-           |                 )
-           |         where credit_code is not null and credit_code <> '' and trim(credit_code)<>''
-           |        ) AS t1
-           |join (
-           |         SELECT  cid
-           |                 ,credit_code
-           |         FROM    (
-           |                 ${v7_company_tuple._1}
-           |                 )
-           |         where credit_code is not null and credit_code <> '' and trim(credit_code)<>''
-           |     ) AS t2
-           |ON      t1.credit_code = t2.credit_code
-           |)
-           |)
-           |where xjk_num = 1
+           |        SELECT  * ,ROW_NUMBER()OVER (PARTITION BY company_id ,cid ORDER BY company_id DESC ) AS xjk_num
+           |        FROM    (
+           |                SELECT  t1.company_id
+           |                        ,t2.cid
+           |                        ,t1.name
+           |                FROM    (
+           |                        SELECT  company_id
+           |                                ,reg_number
+           |                                ,name
+           |                        FROM    (
+           |                                ${company_tuple._1}
+           |                                )
+           |                        WHERE   reg_number IS NOT NULL
+           |                        AND     reg_number <> '' AND trim(reg_number) <> ''
+           |                        ) AS t1
+           |                join (
+           |                        SELECT  cid
+           |                                ,reg_number
+           |                        FROM    (
+           |                                ${v7_company_tuple._1}
+           |                                )
+           |                        WHERE   reg_number IS NOT NULL
+           |                        AND     reg_number <> '' AND trim(reg_number) <> ''
+           |                     ) AS t2
+           |                ON      t1.reg_number = t2.reg_number
+           |                UNION ALL
+           |                SELECT  t1.company_id
+           |                        ,t2.cid
+           |                        ,t1.name
+           |                FROM    (
+           |                        SELECT  company_id
+           |                                ,credit_code
+           |                                ,name
+           |                        FROM    (
+           |                                ${company_tuple._1}
+           |                                )
+           |                        WHERE   credit_code IS NOT NULL
+           |                        AND     credit_code <> '' AND trim(credit_code) <> ''
+           |                        ) AS t1
+           |                join (
+           |                        SELECT  cid
+           |                                ,credit_code
+           |                        FROM    (
+           |                                ${v7_company_tuple._1}
+           |                                )
+           |                        WHERE   credit_code IS NOT NULL
+           |                        AND     credit_code <> '' AND trim(credit_code) <> ''
+           |                     ) AS t2
+           |                ON      t1.credit_code = t2.credit_code
+           |                )
+           |        )
+           |WHERE   xjk_num =1
            |""".stripMargin)
     }
 
@@ -182,37 +185,58 @@ case class V7DataCompare(s: SparkSession
       case "company" => {
         s"""
            |
-           |select gt1.*,gt2.name as legal_entity_name from (
-           |       select * from winhc_eci_dev.inc_ods_$tn where ds = '${BaseUtil.getYesterday()}'
+           |select gt1.*
+           |       ,gt2.name as legal_entity_name
+           |from (
+           |       select *
+           |       from   winhc_eci_dev.inc_ods_$tn
+           |       where  ds = '${BaseUtil.getYesterday()}'
            |) as gt1
            |left join (
-           |       select * from winhc_ng.tmp_hid_name_mapping where ds = '${BaseUtil.getYesterday()}'
+           |       select *
+           |       from   winhc_ng.tmp_hid_name_mapping
+           |       where  ds = '${BaseUtil.getYesterday()}'
            |) as gt2
-           |on gt1.legal_entity_id = gt2.id and gt1.legal_entity_type = gt2.entity_type
+           |on     gt1.legal_entity_id = gt2.id
+           |and    gt1.legal_entity_type = gt2.entity_type
            |""".stripMargin
       }
       case "company_holder" => {
         s"""
            |
-           |select gt1.*,gt2.name as holder_name from (
-           |       select * from winhc_eci_dev.inc_ods_$tn where ds = '${BaseUtil.getYesterday()}'
+           |select gt1.*
+           |       ,gt2.name as holder_name
+           |from (
+           |       select *
+           |       from   winhc_eci_dev.inc_ods_$tn
+           |       where  ds = '${BaseUtil.getYesterday()}'
            |) as gt1
            |left join (
-           |       select * from winhc_ng.tmp_hid_name_mapping where ds = '${BaseUtil.getYesterday()}'
+           |       select *
+           |       from   winhc_ng.tmp_hid_name_mapping
+           |       where  ds = '${BaseUtil.getYesterday()}'
            |) as gt2
-           |on gt1.holder_id = gt2.id and gt1.holder_type = gt2.entity_type
+           |on     gt1.holder_id = gt2.id
+           |and    gt1.holder_type = gt2.entity_type
            |""".stripMargin
       }
       case "company_staff" => {
         s"""
            |
-           |select gt1.*,gt2.name as staff_name from (
-           |       select * from winhc_eci_dev.inc_ods_$tn where ds = '${BaseUtil.getYesterday()}'
+           |select gt1.*
+           |       ,gt2.name as staff_name
+           |from (
+           |       select *
+           |       from   winhc_eci_dev.inc_ods_$tn
+           |       where  ds = '${BaseUtil.getYesterday()}'
            |) as gt1
            |left join (
-           |       select * from winhc_ng.tmp_hid_name_mapping where ds = '${BaseUtil.getYesterday()}' and entity_type = 1
+           |       select *
+           |       from   winhc_ng.tmp_hid_name_mapping
+           |       where  ds = '${BaseUtil.getYesterday()}'
+           |       and    entity_type = 1
            |) as gt2
-           |on gt1.hid = gt2.id
+           |on     gt1.hid = gt2.id
            |""".stripMargin
       }
       case _ => {
@@ -233,22 +257,32 @@ case class V7DataCompare(s: SparkSession
          |       ,t1.cid
          |       ,t1.cols_md5 as cid_cols_md5
          |from (
-         |    select company_id,cid,$cols_md5 as cols_md5 from (
-         |         select tt1.*,tt2.company_id from (
-         |                 ${get_inc_ods(tn)}
+         |    select company_id
+         |           ,cid
+         |           ,$cols_md5 as cols_md5
+         |    from (
+         |         select tt1.*
+         |                ,tt2.company_id
+         |         from (
+         |                ${get_inc_ods(tn)}
          |         ) as tt1
          |         join (
-         |              select * from winhc_ng.tmp_cid_company_id_mapping where ds = '${BaseUtil.getYesterday()}'
+         |              select *
+         |              from   winhc_ng.tmp_cid_company_id_mapping
+         |              where  ds = '${BaseUtil.getYesterday()}'
          |         ) as tt2
          |         on tt1.cid = tt2.cid
          |    )
          |) as t1
          |left anti join (
-         |    select company_id,$cols_md5 as cols_md5 from (
+         |    select company_id
+         |           ,$cols_md5 as cols_md5
+         |    from (
          |        ${tuple._1}
          |    )
          |) as t2
-         |on t1.company_id = t2.company_id and t1.cols_md5 = t2.cols_md5
+         |on  t1.company_id = t2.company_id
+         |and t1.cols_md5 = t2.cols_md5
          |""".stripMargin)
   }
 
@@ -256,7 +290,9 @@ case class V7DataCompare(s: SparkSession
   def get_ods_cid(tn: String): DataFrame = {
     sql(
       s"""
-         |select DISTINCT cid from winhc_eci_dev.inc_ods_$tn where ds = '${BaseUtil.getYesterday()}'
+         |select DISTINCT cid
+         |from   winhc_eci_dev.inc_ods_$tn
+         |where  ds = '${BaseUtil.getYesterday()}'
          |""".stripMargin)
   }
 
@@ -315,9 +351,14 @@ case class V7DataCompare(s: SparkSession
          |select null as company_id
          |       ,t1.cid
          |       ,null as cid_cols_md5
-         |from (select DISTINCT cid from tmp_out_cid_df) as t1
+         |from (
+         |     select DISTINCT cid
+         |     from   tmp_out_cid_df
+         |) as t1
          |left anti join (
-         |       select * from winhc_ng.tmp_cid_company_id_mapping where ds = '${BaseUtil.getYesterday()}'
+         |    select *
+         |    from   winhc_ng.tmp_cid_company_id_mapping
+         |    where  ds = '${BaseUtil.getYesterday()}'
          |) as t2
          |on t1.cid = t2.cid
          |""".stripMargin)