task-step02.yaml 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459
  1. # 公司基本信息、招投标、环保处罚、购地信息、地块公示、行政处罚-信用中国、行政处罚、土地抵押、土地转让
  2. job:
  3. #------<公司基本信息
  4. - project: winhc_test
  5. flow: inc_company_spark
  6. task:
  7. - taskName: company_inc
  8. param:
  9. - _nodeId: 700003381602
  10. project: winhc_eci_dev
  11. #------>
  12. # 招聘
  13. - project: winhc_test
  14. flow: incr_calc_intellectual
  15. task:
  16. - taskName: company_dishonest_info
  17. param:
  18. - _nodeId: 700003375026
  19. project: winhc_eci_dev
  20. tableName: company_dishonest_info
  21. dupliCols: new_cid,case_no
  22. flag: cid
  23. - _nodeId: 700003380225
  24. project: winhc_eci_dev
  25. tableName: company_dishonest_info
  26. cidField: new_cid
  27. dupliCols: new_cid,case_no
  28. - taskName: company_finance
  29. param:
  30. - _nodeId: 700003375026
  31. project: winhc_eci_dev
  32. tableName: company_finance
  33. dupliCols: new_cid,round,money
  34. flag: cid
  35. - _nodeId: 700003380225
  36. project: winhc_eci_dev
  37. tableName: company_finance
  38. cidField: new_cid
  39. dupliCols: new_cid,round,money
  40. - taskName: company_illegal_info
  41. param:
  42. - _nodeId: 700003375026
  43. project: winhc_eci_dev
  44. tableName: company_illegal_info
  45. dupliCols: new_cid,put_reason,put_date,put_department
  46. flag: cid
  47. - _nodeId: 700003380225
  48. project: winhc_eci_dev
  49. tableName: company_illegal_info
  50. cidField: new_cid
  51. dupliCols: new_cid,put_reason,put_date,put_department
  52. - taskName: company_change
  53. param:
  54. - _nodeId: 700003375026
  55. project: winhc_eci_dev
  56. tableName: company_change
  57. dupliCols: new_cid,change_item,change_time
  58. flag: cid
  59. - _nodeId: 700003380225
  60. project: winhc_eci_dev
  61. tableName: company_change
  62. cidField: new_cid
  63. dupliCols: new_cid,change_item,change_time
  64. - taskName: company_employment
  65. param:
  66. - _nodeId: 700003375026
  67. project: winhc_eci_dev
  68. tableName: company_employment
  69. dupliCols: title,new_cid,url_path
  70. flag: cid
  71. - _nodeId: 700003380225
  72. project: winhc_eci_dev
  73. tableName: company_employment
  74. cidField: new_cid
  75. dupliCols: title,new_cid,url_path
  76. #------<招投标:Spark
  77. - project: winhc_test
  78. flow: incr_calc_intellectual
  79. task:
  80. - taskName: company_bid
  81. param:
  82. - _nodeId: 700003375026
  83. project: winhc_eci_dev
  84. tableName: company_bid
  85. dupliCols: new_cid,title,link,publish_time
  86. flag: cids
  87. - _nodeId: 700003380225
  88. project: winhc_eci_dev
  89. tableName: company_bid_list
  90. cidField: new_cid
  91. dupliCols: new_cid,title,link,publish_time
  92. #------>
  93. #------<环保处罚
  94. - taskName: company_env_punishment
  95. param:
  96. - _nodeId: 700003375026
  97. project: winhc_eci_dev
  98. tableName: company_env_punishment
  99. dupliCols: new_cid,name,source_url,punish_number
  100. flag: cid
  101. - _nodeId: 700003380225
  102. project: winhc_eci_dev
  103. tableName: company_land_publicity
  104. cidField: new_cid
  105. dupliCols: new_cid,title,project_name,source_url
  106. #------>
  107. #------<购地信息:Spark
  108. - taskName: company_land_announcement
  109. param:
  110. - _nodeId: 700003375026
  111. project: winhc_eci_dev
  112. tableName: company_land_announcement
  113. dupliCols: new_cid,source_url,e_number,project_name
  114. flag: cid
  115. - _nodeId: 700003380225
  116. project: winhc_eci_dev
  117. tableName: company_land_announcement
  118. cidField: new_cid
  119. dupliCols: new_cid,source_url,e_number,project_name
  120. #------>
  121. #------<地块公示:Spark
  122. - taskName: company_land_publicity
  123. param:
  124. - _nodeId: 700003375026
  125. project: winhc_eci_dev
  126. tableName: company_land_publicity
  127. dupliCols: new_cid,title,project_name,source_url
  128. flag: cid
  129. - _nodeId: 700003380225
  130. project: winhc_eci_dev
  131. tableName: company_land_publicity
  132. cidField: new_cid
  133. dupliCols: new_cid,title,project_name,source_url
  134. #------>
  135. #------<行政处罚-信用中国:SPARK
  136. - taskName: company_punishment_info_creditchina
  137. param:
  138. - _nodeId: 700003375026
  139. project: winhc_eci_dev
  140. tableName: company_punishment_info_creditchina
  141. dupliCols: new_cid,company_name,source,punish_number
  142. flag: cid
  143. - _nodeId: 700003380225
  144. project: winhc_eci_dev
  145. tableName: company_punishment_info_creditchina
  146. cidField: new_cid
  147. dupliCols: new_cid,company_name,source,punish_number
  148. #------>
  149. #------<行政处罚:SPARK
  150. - taskName: company_punishment_info
  151. param:
  152. - _nodeId: 700003375026
  153. project: winhc_eci_dev
  154. tableName: company_punishment_info
  155. dupliCols: new_cid,name,source,desc_file_path,punish_number
  156. flag: cid
  157. - _nodeId: 700003380225
  158. project: winhc_eci_dev
  159. tableName: company_punishment_info
  160. cidField: new_cid
  161. dupliCols: new_cid,name,source,desc_file_path,punish_number
  162. #------>
  163. #------<土地抵押:ODPS SQL
  164. - project: winhc_test
  165. flow: inc_company_land_mortgage_sql
  166. task:
  167. - taskName: company_land_mortgage
  168. param:
  169. - _nodeId: 700003375909
  170. PROJECT: winhc_eci_dev
  171. # DIM_TABLE: company_land_mortgage
  172. - _nodeId: 700003375910
  173. PROJECT: winhc_eci_dev
  174. DIM_TABLE: company_land_mortgage
  175. DIM_COLUMS: type,id,land_mark,land_num,land_aministrative_area,land_loc,land_area,other_item_num,use_right_num,mortgagor,mortgagee,nature,use_for,use_type,area,evaluate_amount,mortgage_amount,source_url,start_date,end_date,create_time,update_time,deleted
  176. DUPLI_COLS: new_cid,type,source_url,land_mark,land_num
  177. MD5_COLS: type,land_num,land_mark,source_url
  178. - _nodeId: 700003422526
  179. PROJECT: winhc_eci_dev
  180. DIM_TABLE: company_land_mortgage
  181. #------>
  182. #------<土地转让:ODPS SQL
  183. - project: winhc_test
  184. flow: inc_company_land_transfer_sql
  185. task:
  186. - taskName: company_land_transfer
  187. param:
  188. - _nodeId: 700003377079
  189. PROJECT: winhc_eci_dev
  190. # DIM_TABLE: company_land_transfer
  191. DS: 20200717
  192. - _nodeId: 700003377080
  193. PROJECT: winhc_eci_dev
  194. DIM_TABLE: company_land_transfer
  195. DIM_COLUMS: type,id,mark,num,location,aministrative_area,user_pre,user_now,area,use_for,use_type,years_of_use,situation,level,merchandise_type,merchandise_price,merchandise_time,url,create_time,update_time,deleted
  196. DUPLI_COLS: new_cid,type,num,location,url
  197. MD5_COLS: type,num,location,url
  198. DS: 20200717
  199. - _nodeId: 700003421692
  200. PROJECT: winhc_eci_dev
  201. DIM_TABLE: company_land_transfer
  202. #------>
  203. #------<公示催告:Spark
  204. - project: winhc_test
  205. flow: incr_calc_intellectual
  206. task:
  207. - taskName: company_public_announcement2
  208. param:
  209. - _nodeId: 700003375026
  210. project: winhc_eci_dev
  211. tableName: company_public_announcement2
  212. dupliCols: new_cid,applicant_cid,owner_cid,drawer_cid,gather_name_cid,bill_num
  213. flag: cids
  214. - _nodeId: 700003380225
  215. project: winhc_eci_dev
  216. tableName: company_public_announcement2_list
  217. cidField: new_cid
  218. dupliCols: new_cid,applicant_cid,owner_cid,drawer_cid,gather_name_cid,bill_num
  219. #------>
  220. #------<动产抵押-抵押信息:Spark
  221. - project: winhc_test
  222. flow: incr_calc_intellectual_without_md5
  223. task:
  224. - taskName: company_mortgage_info
  225. param:
  226. - _nodeId: 700003483308
  227. project: winhc_eci_dev
  228. tableName: company_mortgage_info
  229. dupliCols: new_cid,id
  230. flag: cid
  231. #------>
  232. #------<动产抵押-抵押人:Spark
  233. - project: winhc_test
  234. flow: incr_calc_intellectual_without_md5
  235. task:
  236. - taskName: company_mortgage_people
  237. param:
  238. - _nodeId: 700003483308
  239. project: winhc_eci_dev
  240. tableName: company_mortgage_people
  241. dupliCols: new_cid,main_id,id
  242. flag: cid
  243. #------>
  244. #------<动产抵押-抵押物:Spark
  245. - project: winhc_test
  246. flow: incr_calc_intellectual_without_md5
  247. task:
  248. - taskName: company_mortgage_pawn
  249. param:
  250. - _nodeId: 700003483308
  251. project: winhc_eci_dev
  252. tableName: company_mortgage_pawn
  253. dupliCols: new_cid,main_id,id
  254. flag: cid
  255. #------>
  256. #------<企业公告:Spark
  257. - project: winhc_test
  258. flow: incr_calc_intellectual
  259. task:
  260. - taskName: company_stock_announcement
  261. param:
  262. - _nodeId: 700003375026
  263. project: winhc_eci_dev
  264. tableName: company_stock_announcement
  265. dupliCols: new_cid,title,time
  266. flag: cid
  267. - _nodeId: 700003380225
  268. project: winhc_eci_dev
  269. tableName: company_stock_announcement
  270. cidField: new_cid
  271. dupliCols: new_cid,title,time
  272. #------>
  273. #------<送达公告:Spark
  274. - project: winhc_test
  275. flow: incr_calc_intellectual
  276. task:
  277. - taskName: company_send_announcement
  278. param:
  279. - _nodeId: 700003375026
  280. project: winhc_eci_dev
  281. tableName: company_send_announcement
  282. dupliCols: new_cid,litigant_cids,start_date,case_no
  283. flag: cids
  284. - _nodeId: 700003380225
  285. project: winhc_eci_dev
  286. tableName: company_send_announcement_list
  287. cidField: new_cid
  288. dupliCols: new_cid,litigant_cids,start_date,case_no
  289. #------>
  290. #------<年报-对外担保:Spark
  291. - project: winhc_test
  292. flow: incr_calc_intellectual_without_md5
  293. task:
  294. - taskName: company_annual_report_out_guarantee
  295. param:
  296. - _nodeId: 700003483308
  297. project: winhc_eci_dev
  298. tableName: company_annual_report_out_guarantee
  299. dupliCols: new_cid,id
  300. flag: cid
  301. #------>
  302. - project: winhc_test
  303. flow: inc_company_mapping
  304. task:
  305. #run-1
  306. - taskName: inc_company_mapping
  307. param:
  308. - _nodeId: 700003457354
  309. project: winhc_eci_dev
  310. - project: winhc_test
  311. flow: inc_company_equity_info
  312. task:
  313. #run-1
  314. - taskName: inc_company_equity_info
  315. param:
  316. - _nodeId: 700003452779
  317. project: winhc_eci_dev
  318. - project: winhc_test
  319. flow: incr_calc_intellectual
  320. task:
  321. - taskName: company_own_tax
  322. param:
  323. - _nodeId: 700003375026
  324. project: winhc_eci_dev
  325. tableName: company_own_tax
  326. dupliCols: new_cid,tax_balance,tax_category,tax_num
  327. flag: cid
  328. - _nodeId: 700003380225
  329. project: winhc_eci_dev
  330. tableName: company_own_tax
  331. cidField: new_cid
  332. dupliCols: new_cid,tax_balance,tax_category,tax_num
  333. #run-1
  334. - taskName: company_certificate
  335. param:
  336. - _nodeId: 700003375026
  337. project: winhc_eci_dev
  338. tableName: company_certificate
  339. dupliCols: new_cid,start_date,end_date,cert_no,type
  340. flag: cid
  341. - _nodeId: 700003380225
  342. project: winhc_eci_dev
  343. tableName: company_certificate
  344. cidField: new_cid
  345. dupliCols: new_cid,start_date,end_date,cert_no,type
  346. #run-1
  347. - taskName: company_abnormal_info
  348. param:
  349. - _nodeId: 700003375026
  350. project: winhc_eci_dev
  351. tableName: company_abnormal_info
  352. dupliCols: new_cid,put_reason,put_date
  353. flag: cid
  354. - _nodeId: 700003380225
  355. project: winhc_eci_dev
  356. tableName: company_abnormal_info
  357. cidField: new_cid
  358. dupliCols: new_cid,put_reason,put_date
  359. #run-1
  360. - taskName: company_icp
  361. param:
  362. - _nodeId: 700003375026
  363. project: winhc_eci_dev
  364. tableName: company_icp
  365. dupliCols: new_cid,liscense,domain
  366. flag: cid
  367. - _nodeId: 700003380225
  368. project: winhc_eci_dev
  369. tableName: company_icp
  370. cidField: new_cid
  371. dupliCols: new_cid,liscense,domain
  372. #run-1
  373. - taskName: company_app_info
  374. param:
  375. - _nodeId: 700003375026
  376. project: winhc_eci_dev
  377. tableName: company_app_info
  378. dupliCols: new_cid,name
  379. flag: cid
  380. - _nodeId: 700003380225
  381. project: winhc_eci_dev
  382. tableName: company_app_info
  383. cidField: new_cid
  384. dupliCols: new_cid,name
  385. #run -1
  386. - taskName: company_copyright_reg
  387. param:
  388. - _nodeId: 700003375026
  389. project: winhc_eci_dev
  390. tableName: company_copyright_reg
  391. dupliCols: new_cid,reg_num
  392. flag: cids
  393. - _nodeId: 700003380225
  394. project: winhc_eci_dev
  395. tableName: company_copyright_reg_list
  396. cidField: new_cid
  397. dupliCols: new_cid,reg_num
  398. #run-1
  399. - taskName: company_wechat
  400. param:
  401. - _nodeId: 700003375026
  402. project: winhc_eci_dev
  403. tableName: company_wechat
  404. dupliCols: new_cid,public_num
  405. flag: cid
  406. - _nodeId: 700003380225
  407. project: winhc_eci_dev
  408. tableName: company_wechat
  409. cidField: new_cid
  410. dupliCols: new_cid,public_num
  411. #run -1
  412. - taskName: company_tm
  413. param:
  414. - _nodeId: 700003375026
  415. project: winhc_eci_dev
  416. tableName: company_tm
  417. dupliCols: new_cid,reg_no
  418. flag: cid
  419. - _nodeId: 700003380225
  420. project: winhc_eci_dev
  421. tableName: company_tm
  422. cidField: new_cid
  423. dupliCols: new_cid,reg_no
  424. #run -1
  425. - taskName: company_patent
  426. param:
  427. - _nodeId: 700003375026
  428. project: winhc_eci_dev
  429. tableName: company_patent
  430. dupliCols: new_cid,pub_number,app_number
  431. flag: cids
  432. - _nodeId: 700003380225
  433. project: winhc_eci_dev
  434. tableName: company_patent_list
  435. cidField: new_cid
  436. dupliCols: new_cid,pub_number,app_number
  437. #run-1
  438. - taskName: company_copyright_works
  439. param:
  440. - _nodeId: 700003375026
  441. project: winhc_eci_dev
  442. tableName: company_copyright_works
  443. dupliCols: new_cid,reg_num
  444. flag: cids
  445. - _nodeId: 700003380225
  446. project: winhc_eci_dev
  447. tableName: company_copyright_works_list
  448. cidField: new_cid
  449. dupliCols: new_cid,reg_num