task-step02.yaml 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401
  1. # 公司基本信息、招投标、环保处罚、购地信息、地块公示、行政处罚-信用中国、行政处罚、土地抵押、土地转让
  2. job:
  3. #------<公司基本信息
  4. - project: winhc_test
  5. flow: inc_company_spark
  6. task:
  7. - taskName: company_inc
  8. param:
  9. - _nodeId: 700003381602
  10. project: winhc_eci_dev
  11. #------>
  12. # 招聘
  13. - project: winhc_test
  14. flow: incr_calc_intellectual
  15. task:
  16. - taskName: company_finance
  17. param:
  18. - _nodeId: 700003375026
  19. project: winhc_eci_dev
  20. tableName: company_finance
  21. dupliCols: new_cid,round,money
  22. flag: cid
  23. - _nodeId: 700003380225
  24. project: winhc_eci_dev
  25. tableName: company_finance
  26. cidField: new_cid
  27. dupliCols: new_cid,round,money
  28. - taskName: company_illegal_info
  29. param:
  30. - _nodeId: 700003375026
  31. project: winhc_eci_dev
  32. tableName: company_illegal_info
  33. dupliCols: new_cid,put_reason,put_date,put_department
  34. flag: cid
  35. - _nodeId: 700003380225
  36. project: winhc_eci_dev
  37. tableName: company_illegal_info
  38. cidField: new_cid
  39. dupliCols: new_cid,put_reason,put_date,put_department
  40. - taskName: company_change
  41. param:
  42. - _nodeId: 700003375026
  43. project: winhc_eci_dev
  44. tableName: company_change
  45. dupliCols: new_cid,change_item,change_time
  46. flag: cid
  47. - _nodeId: 700003380225
  48. project: winhc_eci_dev
  49. tableName: company_change
  50. cidField: new_cid
  51. dupliCols: new_cid,change_item,change_time
  52. - taskName: company_employment
  53. param:
  54. - _nodeId: 700003375026
  55. project: winhc_eci_dev
  56. tableName: company_employment
  57. dupliCols: title,new_cid,url_path
  58. flag: cid
  59. - _nodeId: 700003380225
  60. project: winhc_eci_dev
  61. tableName: company_employment
  62. cidField: new_cid
  63. dupliCols: title,new_cid,url_path
  64. #------<招投标:Spark
  65. - project: winhc_test
  66. flow: incr_calc_intellectual
  67. task:
  68. - taskName: company_bid
  69. param:
  70. - _nodeId: 700003375026
  71. project: winhc_eci_dev
  72. tableName: company_bid
  73. dupliCols: new_cid,title,link,publish_time
  74. flag: cids
  75. - _nodeId: 700003380225
  76. project: winhc_eci_dev
  77. tableName: company_bid_list
  78. cidField: new_cid
  79. dupliCols: new_cid,title,link,publish_time
  80. #------>
  81. #------<环保处罚
  82. - taskName: company_env_punishment
  83. param:
  84. - _nodeId: 700003375026
  85. project: winhc_eci_dev
  86. tableName: company_env_punishment
  87. dupliCols: new_cid,name,source_url,punish_number
  88. flag: cid
  89. - _nodeId: 700003380225
  90. project: winhc_eci_dev
  91. tableName: company_land_publicity
  92. cidField: new_cid
  93. dupliCols: new_cid,title,project_name,source_url
  94. #------>
  95. #------<购地信息:Spark
  96. - taskName: company_land_announcement
  97. param:
  98. - _nodeId: 700003375026
  99. project: winhc_eci_dev
  100. tableName: company_land_announcement
  101. dupliCols: new_cid,source_url,e_number,project_name
  102. flag: cid
  103. - _nodeId: 700003380225
  104. project: winhc_eci_dev
  105. tableName: company_land_announcement
  106. cidField: new_cid
  107. dupliCols: new_cid,source_url,e_number,project_name
  108. #------>
  109. #------<地块公示:Spark
  110. - taskName: company_land_publicity
  111. param:
  112. - _nodeId: 700003375026
  113. project: winhc_eci_dev
  114. tableName: company_land_publicity
  115. dupliCols: new_cid,title,project_name,source_url
  116. flag: cid
  117. - _nodeId: 700003380225
  118. project: winhc_eci_dev
  119. tableName: company_land_publicity
  120. cidField: new_cid
  121. dupliCols: new_cid,title,project_name,source_url
  122. #------>
  123. #------<行政处罚-信用中国:SPARK
  124. - taskName: company_punishment_info_creditchina
  125. param:
  126. - _nodeId: 700003375026
  127. project: winhc_eci_dev
  128. tableName: company_punishment_info_creditchina
  129. dupliCols: new_cid,company_name,source,punish_number
  130. flag: cid
  131. - _nodeId: 700003380225
  132. project: winhc_eci_dev
  133. tableName: company_punishment_info_creditchina
  134. cidField: new_cid
  135. dupliCols: new_cid,company_name,source,punish_number
  136. #------>
  137. #------<行政处罚:SPARK
  138. - taskName: company_punishment_info
  139. param:
  140. - _nodeId: 700003375026
  141. project: winhc_eci_dev
  142. tableName: company_punishment_info
  143. dupliCols: new_cid,name,source,desc_file_path,punish_number
  144. flag: cid
  145. - _nodeId: 700003380225
  146. project: winhc_eci_dev
  147. tableName: company_punishment_info
  148. cidField: new_cid
  149. dupliCols: new_cid,name,source,desc_file_path,punish_number
  150. #------>
  151. #------<土地抵押:ODPS SQL
  152. - project: winhc_test
  153. flow: inc_company_land_mortgage_sql
  154. task:
  155. - taskName: company_land_mortgage
  156. param:
  157. - _nodeId: 700003375909
  158. PROJECT: winhc_eci_dev
  159. # DIM_TABLE: company_land_mortgage
  160. - _nodeId: 700003375910
  161. PROJECT: winhc_eci_dev
  162. DIM_TABLE: company_land_mortgage
  163. DIM_COLUMS: type,id,land_mark,land_num,land_aministrative_area,land_loc,land_area,other_item_num,use_right_num,mortgagor,mortgagee,nature,use_for,use_type,area,evaluate_amount,mortgage_amount,source_url,start_date,end_date,create_time,update_time,deleted
  164. DUPLI_COLS: new_cid,type,source_url,land_mark,land_num
  165. MD5_COLS: type,land_num,land_mark,source_url
  166. - _nodeId: 700003422526
  167. PROJECT: winhc_eci_dev
  168. DIM_TABLE: company_land_mortgage
  169. #------>
  170. #------<土地转让:ODPS SQL
  171. - project: winhc_test
  172. flow: inc_company_land_transfer_sql
  173. task:
  174. - taskName: company_land_transfer
  175. param:
  176. - _nodeId: 700003377079
  177. PROJECT: winhc_eci_dev
  178. # DIM_TABLE: company_land_transfer
  179. DS: 20200717
  180. - _nodeId: 700003377080
  181. PROJECT: winhc_eci_dev
  182. DIM_TABLE: company_land_transfer
  183. DIM_COLUMS: type,id,mark,num,location,aministrative_area,user_pre,user_now,area,use_for,use_type,years_of_use,situation,level,merchandise_type,merchandise_price,merchandise_time,url,create_time,update_time,deleted
  184. DUPLI_COLS: new_cid,type,num,location,url
  185. MD5_COLS: type,num,location,url
  186. DS: 20200717
  187. - _nodeId: 700003421692
  188. PROJECT: winhc_eci_dev
  189. DIM_TABLE: company_land_transfer
  190. #------>
  191. #------<公示催告:Spark
  192. - project: winhc_test
  193. flow: incr_calc_intellectual
  194. task:
  195. - taskName: company_public_announcement2
  196. param:
  197. - _nodeId: 700003375026
  198. project: winhc_eci_dev
  199. tableName: company_public_announcement2
  200. dupliCols: new_cid,applicant_cid,owner_cid,drawer_cid,gather_name_cid,bill_num
  201. flag: cids
  202. - _nodeId: 700003380225
  203. project: winhc_eci_dev
  204. tableName: company_public_announcement2_list
  205. cidField: new_cid
  206. dupliCols: new_cid,applicant_cid,owner_cid,drawer_cid,gather_name_cid,bill_num
  207. #------>
  208. #------<动产抵押-抵押信息:Spark
  209. - project: winhc_test
  210. flow: incr_calc_intellectual_without_md5
  211. task:
  212. - taskName: company_mortgage_info
  213. param:
  214. - _nodeId: 700003483308
  215. project: winhc_eci_dev
  216. tableName: company_mortgage_info
  217. dupliCols: new_cid,id
  218. flag: cid
  219. #------>
  220. #------<动产抵押-抵押人:Spark
  221. - project: winhc_test
  222. flow: incr_calc_intellectual_without_md5
  223. task:
  224. - taskName: company_mortgage_people
  225. param:
  226. - _nodeId: 700003483308
  227. project: winhc_eci_dev
  228. tableName: company_mortgage_people
  229. dupliCols: new_cid,main_id,id
  230. flag: cid
  231. #------>
  232. #------<动产抵押-抵押物:Spark
  233. - project: winhc_test
  234. flow: incr_calc_intellectual_without_md5
  235. task:
  236. - taskName: company_mortgage_pawn
  237. param:
  238. - _nodeId: 700003483308
  239. project: winhc_eci_dev
  240. tableName: company_mortgage_pawn
  241. dupliCols: new_cid,main_id,id
  242. flag: cid
  243. #------>
  244. - project: winhc_test
  245. flow: inc_company_mapping
  246. task:
  247. #run-1
  248. - taskName: inc_company_mapping
  249. param:
  250. - _nodeId: 700003457354
  251. project: winhc_eci_dev
  252. - project: winhc_test
  253. flow: inc_company_equity_info
  254. task:
  255. #run-1
  256. - taskName: inc_company_equity_info
  257. param:
  258. - _nodeId: 700003452779
  259. project: winhc_eci_dev
  260. - project: winhc_test
  261. flow: incr_calc_intellectual
  262. task:
  263. - taskName: company_own_tax
  264. param:
  265. - _nodeId: 700003375026
  266. project: winhc_eci_dev
  267. tableName: company_own_tax
  268. dupliCols: new_cid,tax_balance,tax_category,tax_num
  269. flag: cid
  270. - _nodeId: 700003380225
  271. project: winhc_eci_dev
  272. tableName: company_own_tax
  273. cidField: new_cid
  274. dupliCols: new_cid,tax_balance,tax_category,tax_num
  275. #run-1
  276. - taskName: company_certificate
  277. param:
  278. - _nodeId: 700003375026
  279. project: winhc_eci_dev
  280. tableName: company_certificate
  281. dupliCols: new_cid,start_date,end_date,cert_no,type
  282. flag: cid
  283. - _nodeId: 700003380225
  284. project: winhc_eci_dev
  285. tableName: company_certificate
  286. cidField: new_cid
  287. dupliCols: new_cid,start_date,end_date,cert_no,type
  288. #run-1
  289. - taskName: company_abnormal_info
  290. param:
  291. - _nodeId: 700003375026
  292. project: winhc_eci_dev
  293. tableName: company_abnormal_info
  294. dupliCols: new_cid,put_reason,put_date
  295. flag: cid
  296. - _nodeId: 700003380225
  297. project: winhc_eci_dev
  298. tableName: company_abnormal_info
  299. cidField: new_cid
  300. dupliCols: new_cid,put_reason,put_date
  301. #run-1
  302. - taskName: company_icp
  303. param:
  304. - _nodeId: 700003375026
  305. project: winhc_eci_dev
  306. tableName: company_icp
  307. dupliCols: new_cid,liscense,domain
  308. flag: cid
  309. - _nodeId: 700003380225
  310. project: winhc_eci_dev
  311. tableName: company_icp
  312. cidField: new_cid
  313. dupliCols: new_cid,liscense,domain
  314. #run-1
  315. - taskName: company_app_info
  316. param:
  317. - _nodeId: 700003375026
  318. project: winhc_eci_dev
  319. tableName: company_app_info
  320. dupliCols: new_cid,name
  321. flag: cid
  322. - _nodeId: 700003380225
  323. project: winhc_eci_dev
  324. tableName: company_app_info
  325. cidField: new_cid
  326. dupliCols: new_cid,name
  327. #run -1
  328. - taskName: company_copyright_reg
  329. param:
  330. - _nodeId: 700003375026
  331. project: winhc_eci_dev
  332. tableName: company_copyright_reg
  333. dupliCols: new_cid,reg_num
  334. flag: cids
  335. - _nodeId: 700003380225
  336. project: winhc_eci_dev
  337. tableName: company_copyright_reg_list
  338. cidField: new_cid
  339. dupliCols: new_cid,reg_num
  340. #run-1
  341. - taskName: company_wechat
  342. param:
  343. - _nodeId: 700003375026
  344. project: winhc_eci_dev
  345. tableName: company_wechat
  346. dupliCols: new_cid,public_num
  347. flag: cid
  348. - _nodeId: 700003380225
  349. project: winhc_eci_dev
  350. tableName: company_wechat
  351. cidField: new_cid
  352. dupliCols: new_cid,public_num
  353. #run -1
  354. - taskName: company_tm
  355. param:
  356. - _nodeId: 700003375026
  357. project: winhc_eci_dev
  358. tableName: company_tm
  359. dupliCols: new_cid,reg_no
  360. flag: cid
  361. - _nodeId: 700003380225
  362. project: winhc_eci_dev
  363. tableName: company_tm
  364. cidField: new_cid
  365. dupliCols: new_cid,reg_no
  366. #run -1
  367. - taskName: company_patent
  368. param:
  369. - _nodeId: 700003375026
  370. project: winhc_eci_dev
  371. tableName: company_patent
  372. dupliCols: new_cid,pub_number,app_number
  373. flag: cids
  374. - _nodeId: 700003380225
  375. project: winhc_eci_dev
  376. tableName: company_patent_list
  377. cidField: new_cid
  378. dupliCols: new_cid,pub_number,app_number
  379. #run-1
  380. - taskName: company_copyright_works
  381. param:
  382. - _nodeId: 700003375026
  383. project: winhc_eci_dev
  384. tableName: company_copyright_works
  385. dupliCols: new_cid,reg_num
  386. flag: cids
  387. - _nodeId: 700003380225
  388. project: winhc_eci_dev
  389. tableName: company_copyright_works_list
  390. cidField: new_cid
  391. dupliCols: new_cid,reg_num