task-step02.yaml 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406
  1. # 公司基本信息、招投标、环保处罚、购地信息、地块公示、行政处罚-信用中国、行政处罚、土地抵押、土地转让
  2. job:
  3. #------<公司基本信息
  4. - project: winhc_test
  5. flow: inc_company_spark
  6. task:
  7. - taskName: company_inc
  8. param:
  9. - _nodeId: 700003381602
  10. project: winhc_eci_dev
  11. #------>
  12. # 招聘
  13. - project: winhc_test
  14. flow: incr_calc_intellectual
  15. task:
  16. - taskName: company_illegal_info
  17. param:
  18. - _nodeId: 700003375026
  19. project: winhc_eci_dev
  20. tableName: company_illegal_info
  21. dupliCols: new_cid,put_reason,put_date,put_department
  22. flag: cid
  23. - _nodeId: 700003380225
  24. project: winhc_eci_dev
  25. tableName: company_illegal_info
  26. cidField: new_cid
  27. dupliCols: new_cid,put_reason,put_date,put_department
  28. - taskName: company_change
  29. param:
  30. - _nodeId: 700003375026
  31. project: winhc_eci_dev
  32. tableName: company_change
  33. dupliCols: new_cid,change_item,change_time
  34. flag: cid
  35. - _nodeId: 700003380225
  36. project: winhc_eci_dev
  37. tableName: company_change
  38. cidField: new_cid
  39. dupliCols: new_cid,change_item,change_time
  40. - taskName: company_employment
  41. param:
  42. - _nodeId: 700003375026
  43. project: winhc_eci_dev
  44. tableName: company_employment
  45. dupliCols: title,new_cid,url_path
  46. flag: cid
  47. - _nodeId: 700003380225
  48. project: winhc_eci_dev
  49. tableName: company_employment
  50. cidField: new_cid
  51. dupliCols: title,new_cid,url_path
  52. #------<招投标:Spark
  53. - project: winhc_test
  54. flow: incr_calc_intellectual
  55. task:
  56. - taskName: company_bid
  57. param:
  58. - _nodeId: 700003375026
  59. project: winhc_eci_dev
  60. tableName: company_bid
  61. dupliCols: new_cid,title,link,publish_time
  62. flag: cids
  63. - _nodeId: 700003380225
  64. project: winhc_eci_dev
  65. tableName: company_bid_list
  66. cidField: new_cid
  67. dupliCols: new_cid,title,link,publish_time
  68. #------>
  69. #------<环保处罚
  70. - taskName: company_env_punishment
  71. param:
  72. - _nodeId: 700003375026
  73. project: winhc_eci_dev
  74. tableName: company_env_punishment
  75. dupliCols: new_cid,name,source_url,punish_number
  76. flag: cid
  77. - _nodeId: 700003380225
  78. project: winhc_eci_dev
  79. tableName: company_land_publicity
  80. cidField: new_cid
  81. dupliCols: new_cid,title,project_name,source_url
  82. #------>
  83. #------<购地信息:Spark
  84. - taskName: company_land_announcement
  85. param:
  86. - _nodeId: 700003375026
  87. project: winhc_eci_dev
  88. tableName: company_land_announcement
  89. dupliCols: new_cid,source_url,e_number,project_name
  90. flag: cid
  91. - _nodeId: 700003380225
  92. project: winhc_eci_dev
  93. tableName: company_land_announcement
  94. cidField: new_cid
  95. dupliCols: new_cid,source_url,e_number,project_name
  96. #------>
  97. #------<地块公示:Spark
  98. - taskName: company_land_publicity
  99. param:
  100. - _nodeId: 700003375026
  101. project: winhc_eci_dev
  102. tableName: company_land_publicity
  103. dupliCols: new_cid,title,project_name,source_url
  104. flag: cid
  105. - _nodeId: 700003380225
  106. project: winhc_eci_dev
  107. tableName: company_land_publicity
  108. cidField: new_cid
  109. dupliCols: new_cid,title,project_name,source_url
  110. #------>
  111. #------<行政处罚-信用中国:SPARK
  112. - taskName: company_punishment_info_creditchina
  113. param:
  114. - _nodeId: 700003375026
  115. project: winhc_eci_dev
  116. tableName: company_punishment_info_creditchina
  117. dupliCols: new_cid,company_name,source,punish_number
  118. flag: cid
  119. - _nodeId: 700003380225
  120. project: winhc_eci_dev
  121. tableName: company_punishment_info_creditchina
  122. cidField: new_cid
  123. dupliCols: new_cid,company_name,source,punish_number
  124. #------>
  125. #------<行政处罚:SPARK
  126. - taskName: company_punishment_info
  127. param:
  128. - _nodeId: 700003375026
  129. project: winhc_eci_dev
  130. tableName: company_punishment_info
  131. dupliCols: new_cid,name,source,desc_file_path,punish_number
  132. flag: cid
  133. - _nodeId: 700003380225
  134. project: winhc_eci_dev
  135. tableName: company_punishment_info
  136. cidField: new_cid
  137. dupliCols: new_cid,name,source,desc_file_path,punish_number
  138. #------>
  139. #------<土地抵押:ODPS SQL
  140. - project: winhc_test
  141. flow: inc_company_land_mortgage_sql
  142. task:
  143. - taskName: company_land_mortgage
  144. param:
  145. - _nodeId: 700003375909
  146. PROJECT: winhc_eci_dev
  147. # DIM_TABLE: company_land_mortgage
  148. - _nodeId: 700003375910
  149. PROJECT: winhc_eci_dev
  150. DIM_TABLE: company_land_mortgage
  151. DIM_COLUMS: type,id,land_mark,land_num,land_aministrative_area,land_loc,land_area,other_item_num,use_right_num,mortgagor,mortgagee,nature,use_for,use_type,area,evaluate_amount,mortgage_amount,source_url,start_date,end_date,create_time,update_time,deleted
  152. DUPLI_COLS: new_cid,type,source_url,land_mark,land_num
  153. MD5_COLS: type,land_num,land_mark,source_url
  154. - _nodeId: 700003422526
  155. PROJECT: winhc_eci_dev
  156. DIM_TABLE: company_land_mortgage
  157. #------>
  158. #------<土地转让:ODPS SQL
  159. - project: winhc_test
  160. flow: inc_company_land_transfer_sql
  161. task:
  162. - taskName: company_land_transfer
  163. param:
  164. - _nodeId: 700003377079
  165. PROJECT: winhc_eci_dev
  166. # DIM_TABLE: company_land_transfer
  167. DS: 20200717
  168. - _nodeId: 700003377080
  169. PROJECT: winhc_eci_dev
  170. DIM_TABLE: company_land_transfer
  171. DIM_COLUMS: type,id,mark,num,location,aministrative_area,user_pre,user_now,area,use_for,use_type,years_of_use,situation,level,merchandise_type,merchandise_price,merchandise_time,url,create_time,update_time,deleted
  172. DUPLI_COLS: new_cid,type,num,location,url
  173. MD5_COLS: type,num,location,url
  174. DS: 20200717
  175. - _nodeId: 700003421692
  176. PROJECT: winhc_eci_dev
  177. DIM_TABLE: company_land_transfer
  178. #------>
  179. #------<公示催告:Spark
  180. - project: winhc_test
  181. flow: incr_calc_intellectual
  182. task:
  183. - taskName: company_public_announcement2
  184. param:
  185. - _nodeId: 700003375026
  186. project: winhc_eci_dev
  187. tableName: company_public_announcement2
  188. dupliCols: new_cid,applicant_cid,owner_cid,drawer_cid,gather_name_cid,bill_num
  189. flag: cids
  190. - _nodeId: 700003380225
  191. project: winhc_eci_dev
  192. tableName: company_public_announcement2_list
  193. cidField: new_cid
  194. dupliCols: new_cid,applicant_cid,owner_cid,drawer_cid,gather_name_cid,bill_num
  195. #------>
  196. #------<动产抵押-抵押信息:Spark
  197. - project: winhc_test
  198. flow: incr_calc_intellectual_without_md5
  199. task:
  200. - taskName: company_mortgage_info
  201. param:
  202. - _nodeId: 700003483308
  203. project: winhc_eci_dev
  204. tableName: company_mortgage_info
  205. dupliCols: new_cid,id
  206. flag: cid
  207. #------>
  208. #------<动产抵押-抵押人:Spark
  209. - project: winhc_test
  210. flow: incr_calc_intellectual_without_md5
  211. task:
  212. - taskName: company_mortgage_people
  213. param:
  214. - _nodeId: 700003483308
  215. project: winhc_eci_dev
  216. tableName: company_mortgage_people
  217. dupliCols: new_cid,main_id,id
  218. flag: cid
  219. #------>
  220. #------<动产抵押-抵押物:Spark
  221. - project: winhc_test
  222. flow: incr_calc_intellectual_without_md5
  223. task:
  224. - taskName: company_mortgage_pawn
  225. param:
  226. - _nodeId: 700003483308
  227. project: winhc_eci_dev
  228. tableName: company_mortgage_pawn
  229. dupliCols: new_cid,main_id,id
  230. flag: cid
  231. #------>
  232. #------<企业公告:Spark
  233. - project: winhc_test
  234. flow: incr_calc_intellectual
  235. task:
  236. - taskName: company_stock_announcement
  237. param:
  238. - _nodeId: 700003375026
  239. project: winhc_eci_dev
  240. tableName: company_stock_announcement
  241. dupliCols: new_cid,title,time
  242. flag: cid
  243. - _nodeId: 700003380225
  244. project: winhc_eci_dev
  245. tableName: company_stock_announcement
  246. cidField: new_cid
  247. dupliCols: new_cid,title,time
  248. #------>
  249. - project: winhc_test
  250. flow: inc_company_mapping
  251. task:
  252. #run-1
  253. - taskName: inc_company_mapping
  254. param:
  255. - _nodeId: 700003457354
  256. project: winhc_eci_dev
  257. - project: winhc_test
  258. flow: inc_company_equity_info
  259. task:
  260. #run-1
  261. - taskName: inc_company_equity_info
  262. param:
  263. - _nodeId: 700003452779
  264. project: winhc_eci_dev
  265. - project: winhc_test
  266. flow: incr_calc_intellectual
  267. task:
  268. - taskName: company_own_tax
  269. param:
  270. - _nodeId: 700003375026
  271. project: winhc_eci_dev
  272. tableName: company_own_tax
  273. dupliCols: new_cid,tax_balance,tax_category,tax_num
  274. flag: cid
  275. - _nodeId: 700003380225
  276. project: winhc_eci_dev
  277. tableName: company_own_tax
  278. cidField: new_cid
  279. dupliCols: new_cid,tax_balance,tax_category,tax_num
  280. #run-1
  281. - taskName: company_certificate
  282. param:
  283. - _nodeId: 700003375026
  284. project: winhc_eci_dev
  285. tableName: company_certificate
  286. dupliCols: new_cid,start_date,end_date,cert_no,type
  287. flag: cid
  288. - _nodeId: 700003380225
  289. project: winhc_eci_dev
  290. tableName: company_certificate
  291. cidField: new_cid
  292. dupliCols: new_cid,start_date,end_date,cert_no,type
  293. #run-1
  294. - taskName: company_abnormal_info
  295. param:
  296. - _nodeId: 700003375026
  297. project: winhc_eci_dev
  298. tableName: company_abnormal_info
  299. dupliCols: new_cid,put_reason,put_date
  300. flag: cid
  301. - _nodeId: 700003380225
  302. project: winhc_eci_dev
  303. tableName: company_abnormal_info
  304. cidField: new_cid
  305. dupliCols: new_cid,put_reason,put_date
  306. #run-1
  307. - taskName: company_icp
  308. param:
  309. - _nodeId: 700003375026
  310. project: winhc_eci_dev
  311. tableName: company_icp
  312. dupliCols: new_cid,liscense,domain
  313. flag: cid
  314. - _nodeId: 700003380225
  315. project: winhc_eci_dev
  316. tableName: company_icp
  317. cidField: new_cid
  318. dupliCols: new_cid,liscense,domain
  319. #run-1
  320. - taskName: company_app_info
  321. param:
  322. - _nodeId: 700003375026
  323. project: winhc_eci_dev
  324. tableName: company_app_info
  325. dupliCols: new_cid,name
  326. flag: cid
  327. - _nodeId: 700003380225
  328. project: winhc_eci_dev
  329. tableName: company_app_info
  330. cidField: new_cid
  331. dupliCols: new_cid,name
  332. #run -1
  333. - taskName: company_copyright_reg
  334. param:
  335. - _nodeId: 700003375026
  336. project: winhc_eci_dev
  337. tableName: company_copyright_reg
  338. dupliCols: new_cid,reg_num
  339. flag: cids
  340. - _nodeId: 700003380225
  341. project: winhc_eci_dev
  342. tableName: company_copyright_reg_list
  343. cidField: new_cid
  344. dupliCols: new_cid,reg_num
  345. #run-1
  346. - taskName: company_wechat
  347. param:
  348. - _nodeId: 700003375026
  349. project: winhc_eci_dev
  350. tableName: company_wechat
  351. dupliCols: new_cid,public_num
  352. flag: cid
  353. - _nodeId: 700003380225
  354. project: winhc_eci_dev
  355. tableName: company_wechat
  356. cidField: new_cid
  357. dupliCols: new_cid,public_num
  358. #run -1
  359. - taskName: company_tm
  360. param:
  361. - _nodeId: 700003375026
  362. project: winhc_eci_dev
  363. tableName: company_tm
  364. dupliCols: new_cid,reg_no
  365. flag: cid
  366. - _nodeId: 700003380225
  367. project: winhc_eci_dev
  368. tableName: company_tm
  369. cidField: new_cid
  370. dupliCols: new_cid,reg_no
  371. #run -1
  372. - taskName: company_patent
  373. param:
  374. - _nodeId: 700003375026
  375. project: winhc_eci_dev
  376. tableName: company_patent
  377. dupliCols: new_cid,pub_number,app_number
  378. flag: cids
  379. - _nodeId: 700003380225
  380. project: winhc_eci_dev
  381. tableName: company_patent_list
  382. cidField: new_cid
  383. dupliCols: new_cid,pub_number,app_number
  384. #run-1
  385. - taskName: company_copyright_works
  386. param:
  387. - _nodeId: 700003375026
  388. project: winhc_eci_dev
  389. tableName: company_copyright_works
  390. dupliCols: new_cid,reg_num
  391. flag: cids
  392. - _nodeId: 700003380225
  393. project: winhc_eci_dev
  394. tableName: company_copyright_works_list
  395. cidField: new_cid
  396. dupliCols: new_cid,reg_num