task-step02.yaml 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377
  1. # 公司基本信息、招投标、环保处罚、购地信息、地块公示、行政处罚-信用中国、行政处罚、土地抵押、土地转让
  2. job:
  3. #------<公司基本信息
  4. - project: winhc_test
  5. flow: inc_company_spark
  6. task:
  7. - taskName: company_inc
  8. param:
  9. - _nodeId: 700003381602
  10. project: winhc_eci_dev
  11. #------>
  12. # 招聘
  13. - project: winhc_test
  14. flow: incr_calc_intellectual
  15. task:
  16. - taskName: company_change
  17. param:
  18. - _nodeId: 700003375026
  19. project: winhc_eci_dev
  20. tableName: company_change
  21. dupliCols: new_cid,change_item,change_time
  22. flag: cid
  23. - _nodeId: 700003380225
  24. project: winhc_eci_dev
  25. tableName: company_change
  26. cidField: new_cid
  27. dupliCols: new_cid,change_item,change_time
  28. - taskName: company_employment
  29. param:
  30. - _nodeId: 700003375026
  31. project: winhc_eci_dev
  32. tableName: company_employment
  33. dupliCols: title,new_cid,url_path
  34. flag: cid
  35. - _nodeId: 700003380225
  36. project: winhc_eci_dev
  37. tableName: company_employment
  38. cidField: new_cid
  39. dupliCols: title,new_cid,url_path
  40. #------<招投标:Spark
  41. - project: winhc_test
  42. flow: incr_calc_intellectual
  43. task:
  44. - taskName: company_bid
  45. param:
  46. - _nodeId: 700003375026
  47. project: winhc_eci_dev
  48. tableName: company_bid
  49. dupliCols: new_cid,title,link,publish_time
  50. flag: cids
  51. - _nodeId: 700003380225
  52. project: winhc_eci_dev
  53. tableName: company_bid_list
  54. cidField: new_cid
  55. dupliCols: new_cid,title,link,publish_time
  56. #------>
  57. #------<环保处罚
  58. - taskName: company_env_punishment
  59. param:
  60. - _nodeId: 700003375026
  61. project: winhc_eci_dev
  62. tableName: company_env_punishment
  63. dupliCols: new_cid,name,source_url,punish_number
  64. flag: cid
  65. - _nodeId: 700003380225
  66. project: winhc_eci_dev
  67. tableName: company_land_publicity
  68. cidField: new_cid
  69. dupliCols: new_cid,title,project_name,source_url
  70. #------>
  71. #------<购地信息:Spark
  72. - taskName: company_land_announcement
  73. param:
  74. - _nodeId: 700003375026
  75. project: winhc_eci_dev
  76. tableName: company_land_announcement
  77. dupliCols: new_cid,source_url,e_number,project_name
  78. flag: cid
  79. - _nodeId: 700003380225
  80. project: winhc_eci_dev
  81. tableName: company_land_announcement
  82. cidField: new_cid
  83. dupliCols: new_cid,source_url,e_number,project_name
  84. #------>
  85. #------<地块公示:Spark
  86. - taskName: company_land_publicity
  87. param:
  88. - _nodeId: 700003375026
  89. project: winhc_eci_dev
  90. tableName: company_land_publicity
  91. dupliCols: new_cid,title,project_name,source_url
  92. flag: cid
  93. - _nodeId: 700003380225
  94. project: winhc_eci_dev
  95. tableName: company_land_publicity
  96. cidField: new_cid
  97. dupliCols: new_cid,title,project_name,source_url
  98. #------>
  99. #------<行政处罚-信用中国:SPARK
  100. - taskName: company_punishment_info_creditchina
  101. param:
  102. - _nodeId: 700003375026
  103. project: winhc_eci_dev
  104. tableName: company_punishment_info_creditchina
  105. dupliCols: new_cid,company_name,source,punish_number
  106. flag: cid
  107. - _nodeId: 700003380225
  108. project: winhc_eci_dev
  109. tableName: company_punishment_info_creditchina
  110. cidField: new_cid
  111. dupliCols: new_cid,company_name,source,punish_number
  112. #------>
  113. #------<行政处罚:SPARK
  114. - taskName: company_punishment_info
  115. param:
  116. - _nodeId: 700003375026
  117. project: winhc_eci_dev
  118. tableName: company_punishment_info
  119. dupliCols: new_cid,name,source,desc_file_path,punish_number
  120. flag: cid
  121. - _nodeId: 700003380225
  122. project: winhc_eci_dev
  123. tableName: company_punishment_info
  124. cidField: new_cid
  125. dupliCols: new_cid,name,source,desc_file_path,punish_number
  126. #------>
  127. #------<土地抵押:ODPS SQL
  128. - project: winhc_test
  129. flow: inc_company_land_mortgage_sql
  130. task:
  131. - taskName: company_land_mortgage
  132. param:
  133. - _nodeId: 700003375909
  134. PROJECT: winhc_eci_dev
  135. # DIM_TABLE: company_land_mortgage
  136. - _nodeId: 700003375910
  137. PROJECT: winhc_eci_dev
  138. DIM_TABLE: company_land_mortgage
  139. DIM_COLUMS: type,id,land_mark,land_num,land_aministrative_area,land_loc,land_area,other_item_num,use_right_num,mortgagor,mortgagee,nature,use_for,use_type,area,evaluate_amount,mortgage_amount,source_url,start_date,end_date,create_time,update_time,deleted
  140. DUPLI_COLS: new_cid,type,source_url,land_mark,land_num
  141. MD5_COLS: type,land_num,land_mark,source_url
  142. - _nodeId: 700003422526
  143. PROJECT: winhc_eci_dev
  144. DIM_TABLE: company_land_mortgage
  145. #------>
  146. #------<土地转让:ODPS SQL
  147. - project: winhc_test
  148. flow: inc_company_land_transfer_sql
  149. task:
  150. - taskName: company_land_transfer
  151. param:
  152. - _nodeId: 700003377079
  153. PROJECT: winhc_eci_dev
  154. # DIM_TABLE: company_land_transfer
  155. DS: 20200717
  156. - _nodeId: 700003377080
  157. PROJECT: winhc_eci_dev
  158. DIM_TABLE: company_land_transfer
  159. DIM_COLUMS: type,id,mark,num,location,aministrative_area,user_pre,user_now,area,use_for,use_type,years_of_use,situation,level,merchandise_type,merchandise_price,merchandise_time,url,create_time,update_time,deleted
  160. DUPLI_COLS: new_cid,type,num,location,url
  161. MD5_COLS: type,num,location,url
  162. DS: 20200717
  163. - _nodeId: 700003421692
  164. PROJECT: winhc_eci_dev
  165. DIM_TABLE: company_land_transfer
  166. #------>
  167. #------<公示催告:Spark
  168. - project: winhc_test
  169. flow: incr_calc_intellectual
  170. task:
  171. - taskName: company_public_announcement2
  172. param:
  173. - _nodeId: 700003375026
  174. project: winhc_eci_dev
  175. tableName: company_public_announcement2
  176. dupliCols: new_cid,applicant_cid,owner_cid,drawer_cid,gather_name_cid,bill_num
  177. flag: cids
  178. - _nodeId: 700003380225
  179. project: winhc_eci_dev
  180. tableName: company_public_announcement2_list
  181. cidField: new_cid
  182. dupliCols: new_cid,applicant_cid,owner_cid,drawer_cid,gather_name_cid,bill_num
  183. #------>
  184. #------<动产抵押-抵押信息:Spark
  185. - project: winhc_test
  186. flow: incr_calc_intellectual_without_md5
  187. task:
  188. - taskName: company_mortgage_info
  189. param:
  190. - _nodeId: 700003483308
  191. project: winhc_eci_dev
  192. tableName: company_mortgage_info
  193. dupliCols: new_cid,id
  194. flag: cid
  195. #------>
  196. #------<动产抵押-抵押人:Spark
  197. - project: winhc_test
  198. flow: incr_calc_intellectual_without_md5
  199. task:
  200. - taskName: company_mortgage_people
  201. param:
  202. - _nodeId: 700003483308
  203. project: winhc_eci_dev
  204. tableName: company_mortgage_people
  205. dupliCols: new_cid,main_id,id
  206. flag: cid
  207. #------>
  208. #------<动产抵押-抵押物:Spark
  209. - project: winhc_test
  210. flow: incr_calc_intellectual_without_md5
  211. task:
  212. - taskName: company_mortgage_pawn
  213. param:
  214. - _nodeId: 700003483308
  215. project: winhc_eci_dev
  216. tableName: company_mortgage_pawn
  217. dupliCols: new_cid,main_id,id
  218. flag: cid
  219. #------>
  220. - project: winhc_test
  221. flow: inc_company_mapping
  222. task:
  223. #run-1
  224. - taskName: inc_company_mapping
  225. param:
  226. - _nodeId: 700003457354
  227. project: winhc_eci_dev
  228. - project: winhc_test
  229. flow: inc_company_equity_info
  230. task:
  231. #run-1
  232. - taskName: inc_company_equity_info
  233. param:
  234. - _nodeId: 700003452779
  235. project: winhc_eci_dev
  236. - project: winhc_test
  237. flow: incr_calc_intellectual
  238. task:
  239. - taskName: company_own_tax
  240. param:
  241. - _nodeId: 700003375026
  242. project: winhc_eci_dev
  243. tableName: company_own_tax
  244. dupliCols: new_cid,tax_balance,tax_category,tax_num
  245. flag: cid
  246. - _nodeId: 700003380225
  247. project: winhc_eci_dev
  248. tableName: company_own_tax
  249. cidField: new_cid
  250. dupliCols: new_cid,tax_balance,tax_category,tax_num
  251. #run-1
  252. - taskName: company_certificate
  253. param:
  254. - _nodeId: 700003375026
  255. project: winhc_eci_dev
  256. tableName: company_certificate
  257. dupliCols: new_cid,start_date,end_date,cert_no,type
  258. flag: cid
  259. - _nodeId: 700003380225
  260. project: winhc_eci_dev
  261. tableName: company_certificate
  262. cidField: new_cid
  263. dupliCols: new_cid,start_date,end_date,cert_no,type
  264. #run-1
  265. - taskName: company_abnormal_info
  266. param:
  267. - _nodeId: 700003375026
  268. project: winhc_eci_dev
  269. tableName: company_abnormal_info
  270. dupliCols: new_cid,put_reason,put_date
  271. flag: cid
  272. - _nodeId: 700003380225
  273. project: winhc_eci_dev
  274. tableName: company_abnormal_info
  275. cidField: new_cid
  276. dupliCols: new_cid,put_reason,put_date
  277. #run-1
  278. - taskName: company_icp
  279. param:
  280. - _nodeId: 700003375026
  281. project: winhc_eci_dev
  282. tableName: company_icp
  283. dupliCols: new_cid,liscense,domain
  284. flag: cid
  285. - _nodeId: 700003380225
  286. project: winhc_eci_dev
  287. tableName: company_icp
  288. cidField: new_cid
  289. dupliCols: new_cid,liscense,domain
  290. #run-1
  291. - taskName: company_app_info
  292. param:
  293. - _nodeId: 700003375026
  294. project: winhc_eci_dev
  295. tableName: company_app_info
  296. dupliCols: new_cid,name
  297. flag: cid
  298. - _nodeId: 700003380225
  299. project: winhc_eci_dev
  300. tableName: company_app_info
  301. cidField: new_cid
  302. dupliCols: new_cid,name
  303. #run -1
  304. - taskName: company_copyright_reg
  305. param:
  306. - _nodeId: 700003375026
  307. project: winhc_eci_dev
  308. tableName: company_copyright_reg
  309. dupliCols: new_cid,reg_num
  310. flag: cids
  311. - _nodeId: 700003380225
  312. project: winhc_eci_dev
  313. tableName: company_copyright_reg_list
  314. cidField: new_cid
  315. dupliCols: new_cid,reg_num
  316. #run-1
  317. - taskName: company_wechat
  318. param:
  319. - _nodeId: 700003375026
  320. project: winhc_eci_dev
  321. tableName: company_wechat
  322. dupliCols: new_cid,public_num
  323. flag: cid
  324. - _nodeId: 700003380225
  325. project: winhc_eci_dev
  326. tableName: company_wechat
  327. cidField: new_cid
  328. dupliCols: new_cid,public_num
  329. #run -1
  330. - taskName: company_tm
  331. param:
  332. - _nodeId: 700003375026
  333. project: winhc_eci_dev
  334. tableName: company_tm
  335. dupliCols: new_cid,reg_no
  336. flag: cid
  337. - _nodeId: 700003380225
  338. project: winhc_eci_dev
  339. tableName: company_tm
  340. cidField: new_cid
  341. dupliCols: new_cid,reg_no
  342. #run -1
  343. - taskName: company_patent
  344. param:
  345. - _nodeId: 700003375026
  346. project: winhc_eci_dev
  347. tableName: company_patent
  348. dupliCols: new_cid,pub_number,app_number
  349. flag: cids
  350. - _nodeId: 700003380225
  351. project: winhc_eci_dev
  352. tableName: company_patent_list
  353. cidField: new_cid
  354. dupliCols: new_cid,pub_number,app_number
  355. #run-1
  356. - taskName: company_copyright_works
  357. param:
  358. - _nodeId: 700003375026
  359. project: winhc_eci_dev
  360. tableName: company_copyright_works
  361. dupliCols: new_cid,reg_num
  362. flag: cids
  363. - _nodeId: 700003380225
  364. project: winhc_eci_dev
  365. tableName: company_copyright_works_list
  366. cidField: new_cid
  367. dupliCols: new_cid,reg_num