task-step02.yaml 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382
  1. # 公司基本信息、招投标、环保处罚、购地信息、地块公示、行政处罚-信用中国、行政处罚、土地抵押、土地转让
  2. job:
  3. #------<公司基本信息
  4. - project: winhc_test
  5. flow: inc_company_spark
  6. task:
  7. - taskName: company_inc
  8. param:
  9. - _nodeId: 700003381602
  10. project: winhc_eci_dev
  11. #------>
  12. # 招聘
  13. - project: winhc_test
  14. flow: incr_calc_intellectual
  15. task:
  16. - taskName: company_employment
  17. param:
  18. - _nodeId: 700003375026
  19. project: winhc_eci_dev
  20. tableName: company_employment
  21. dupliCols: title,new_cid,url_path
  22. flag: cid
  23. - _nodeId: 700003380225
  24. project: winhc_eci_dev
  25. tableName: company_employment
  26. cidField: new_cid
  27. dupliCols: title,new_cid,url_path
  28. #------<招投标:Spark
  29. - project: winhc_test
  30. flow: incr_calc_intellectual
  31. task:
  32. - taskName: company_bid
  33. param:
  34. - _nodeId: 700003375026
  35. project: winhc_eci_dev
  36. tableName: company_bid
  37. dupliCols: new_cid,title,link,publish_time
  38. flag: cids
  39. - _nodeId: 700003380225
  40. project: winhc_eci_dev
  41. tableName: company_bid_list
  42. cidField: new_cid
  43. dupliCols: new_cid,title,link,publish_time
  44. #------>
  45. #------<环保处罚
  46. - taskName: company_env_punishment
  47. param:
  48. - _nodeId: 700003375026
  49. project: winhc_eci_dev
  50. tableName: company_env_punishment
  51. dupliCols: new_cid,name,source_url,punish_number
  52. flag: cid
  53. - _nodeId: 700003380225
  54. project: winhc_eci_dev
  55. tableName: company_land_publicity
  56. cidField: new_cid
  57. dupliCols: new_cid,title,project_name,source_url
  58. #------>
  59. #------<购地信息:Spark
  60. - taskName: company_land_announcement
  61. param:
  62. - _nodeId: 700003375026
  63. project: winhc_eci_dev
  64. tableName: company_land_announcement
  65. dupliCols: new_cid,source_url,e_number,project_name
  66. flag: cid
  67. - _nodeId: 700003380225
  68. project: winhc_eci_dev
  69. tableName: company_land_announcement
  70. cidField: new_cid
  71. dupliCols: new_cid,source_url,e_number,project_name
  72. #------>
  73. #------<地块公示:Spark
  74. - taskName: company_land_publicity
  75. param:
  76. - _nodeId: 700003375026
  77. project: winhc_eci_dev
  78. tableName: company_land_publicity
  79. dupliCols: new_cid,title,project_name,source_url
  80. flag: cid
  81. - _nodeId: 700003380225
  82. project: winhc_eci_dev
  83. tableName: company_land_publicity
  84. cidField: new_cid
  85. dupliCols: new_cid,title,project_name,source_url
  86. #------>
  87. #------<行政处罚-信用中国:SPARK
  88. - taskName: company_punishment_info_creditchina
  89. param:
  90. - _nodeId: 700003375026
  91. project: winhc_eci_dev
  92. tableName: company_punishment_info_creditchina
  93. dupliCols: new_cid,company_name,source,punish_number
  94. flag: cid
  95. - _nodeId: 700003380225
  96. project: winhc_eci_dev
  97. tableName: company_punishment_info_creditchina
  98. cidField: new_cid
  99. dupliCols: new_cid,company_name,source,punish_number
  100. #------>
  101. #------<行政处罚:SPARK
  102. - taskName: company_punishment_info
  103. param:
  104. - _nodeId: 700003375026
  105. project: winhc_eci_dev
  106. tableName: company_punishment_info
  107. dupliCols: new_cid,name,source,desc_file_path,punish_number
  108. flag: cid
  109. - _nodeId: 700003380225
  110. project: winhc_eci_dev
  111. tableName: company_punishment_info
  112. cidField: new_cid
  113. dupliCols: new_cid,name,source,desc_file_path,punish_number
  114. #------>
  115. #------<土地抵押:ODPS SQL
  116. - project: winhc_test
  117. flow: inc_company_land_mortgage_sql
  118. task:
  119. - taskName: company_land_mortgage
  120. param:
  121. - _nodeId: 700003375909
  122. PROJECT: winhc_eci_dev
  123. # DIM_TABLE: company_land_mortgage
  124. - _nodeId: 700003375910
  125. PROJECT: winhc_eci_dev
  126. DIM_TABLE: company_land_mortgage
  127. DIM_COLUMS: type,id,land_mark,land_num,land_aministrative_area,land_loc,land_area,other_item_num,use_right_num,mortgagor,mortgagee,nature,use_for,use_type,area,evaluate_amount,mortgage_amount,source_url,start_date,end_date,create_time,update_time,deleted
  128. DUPLI_COLS: new_cid,type,source_url,land_mark,land_num
  129. MD5_COLS: type,land_num,land_mark,source_url
  130. - _nodeId: 700003422526
  131. PROJECT: winhc_eci_dev
  132. DIM_TABLE: company_land_mortgage
  133. #------>
  134. #------<土地转让:ODPS SQL
  135. - project: winhc_test
  136. flow: inc_company_land_transfer_sql
  137. task:
  138. - taskName: company_land_transfer
  139. param:
  140. - _nodeId: 700003377079
  141. PROJECT: winhc_eci_dev
  142. # DIM_TABLE: company_land_transfer
  143. DS: 20200717
  144. - _nodeId: 700003377080
  145. PROJECT: winhc_eci_dev
  146. DIM_TABLE: company_land_transfer
  147. DIM_COLUMS: type,id,mark,num,location,aministrative_area,user_pre,user_now,area,use_for,use_type,years_of_use,situation,level,merchandise_type,merchandise_price,merchandise_time,url,create_time,update_time,deleted
  148. DUPLI_COLS: new_cid,type,num,location,url
  149. MD5_COLS: type,num,location,url
  150. DS: 20200717
  151. - _nodeId: 700003421692
  152. PROJECT: winhc_eci_dev
  153. DIM_TABLE: company_land_transfer
  154. #------>
  155. #------<公示催告:Spark
  156. - project: winhc_test
  157. flow: incr_calc_intellectual
  158. task:
  159. - taskName: company_public_announcement2
  160. param:
  161. - _nodeId: 700003375026
  162. project: winhc_eci_dev
  163. tableName: company_public_announcement2
  164. dupliCols: new_cid,applicant_cid,owner_cid,drawer_cid,gather_name_cid,bill_num
  165. flag: cids
  166. - _nodeId: 700003380225
  167. project: winhc_eci_dev
  168. tableName: company_public_announcement2_list
  169. cidField: new_cid
  170. dupliCols: new_cid,applicant_cid,owner_cid,drawer_cid,gather_name_cid,bill_num
  171. #------>
  172. #------<动产抵押-抵押信息:Spark
  173. - project: winhc_test
  174. flow: incr_calc_intellectual_without_md5
  175. task:
  176. - taskName: company_mortgage_info
  177. param:
  178. - _nodeId: 700003483308
  179. project: winhc_eci_dev
  180. tableName: company_mortgage_info
  181. dupliCols: new_cid,id
  182. flag: cid
  183. #------>
  184. #------<动产抵押-抵押人:Spark
  185. - project: winhc_test
  186. flow: incr_calc_intellectual_without_md5
  187. task:
  188. - taskName: company_mortgage_people
  189. param:
  190. - _nodeId: 700003483308
  191. project: winhc_eci_dev
  192. tableName: company_mortgage_people
  193. dupliCols: new_cid,main_id,id
  194. flag: cid
  195. #------>
  196. #------<动产抵押-抵押物:Spark
  197. - project: winhc_test
  198. flow: incr_calc_intellectual_without_md5
  199. task:
  200. - taskName: company_mortgage_pawn
  201. param:
  202. - _nodeId: 700003483308
  203. project: winhc_eci_dev
  204. tableName: company_mortgage_pawn
  205. dupliCols: new_cid,main_id,id
  206. flag: cid
  207. #------>
  208. #------<企业公告:Spark
  209. - project: winhc_test
  210. flow: incr_calc_intellectual
  211. task:
  212. - taskName: company_stock_announcement
  213. param:
  214. - _nodeId: 700003375026
  215. project: winhc_eci_dev
  216. tableName: company_stock_announcement
  217. dupliCols: new_cid,title,time
  218. flag: cid
  219. - _nodeId: 700003380225
  220. project: winhc_eci_dev
  221. tableName: company_stock_announcement
  222. cidField: new_cid
  223. dupliCols: new_cid,title,time
  224. #------>
  225. - project: winhc_test
  226. flow: inc_company_mapping
  227. task:
  228. #run-1
  229. - taskName: inc_company_mapping
  230. param:
  231. - _nodeId: 700003457354
  232. project: winhc_eci_dev
  233. - project: winhc_test
  234. flow: inc_company_equity_info
  235. task:
  236. #run-1
  237. - taskName: inc_company_equity_info
  238. param:
  239. - _nodeId: 700003452779
  240. project: winhc_eci_dev
  241. - project: winhc_test
  242. flow: incr_calc_intellectual
  243. task:
  244. - taskName: company_own_tax
  245. param:
  246. - _nodeId: 700003375026
  247. project: winhc_eci_dev
  248. tableName: company_own_tax
  249. dupliCols: new_cid,tax_balance,tax_category,tax_num
  250. flag: cid
  251. - _nodeId: 700003380225
  252. project: winhc_eci_dev
  253. tableName: company_own_tax
  254. cidField: new_cid
  255. dupliCols: new_cid,tax_balance,tax_category,tax_num
  256. #run-1
  257. - taskName: company_certificate
  258. param:
  259. - _nodeId: 700003375026
  260. project: winhc_eci_dev
  261. tableName: company_certificate
  262. dupliCols: new_cid,start_date,end_date,cert_no,type
  263. flag: cid
  264. - _nodeId: 700003380225
  265. project: winhc_eci_dev
  266. tableName: company_certificate
  267. cidField: new_cid
  268. dupliCols: new_cid,start_date,end_date,cert_no,type
  269. #run-1
  270. - taskName: company_abnormal_info
  271. param:
  272. - _nodeId: 700003375026
  273. project: winhc_eci_dev
  274. tableName: company_abnormal_info
  275. dupliCols: new_cid,put_reason,put_date
  276. flag: cid
  277. - _nodeId: 700003380225
  278. project: winhc_eci_dev
  279. tableName: company_abnormal_info
  280. cidField: new_cid
  281. dupliCols: new_cid,put_reason,put_date
  282. #run-1
  283. - taskName: company_icp
  284. param:
  285. - _nodeId: 700003375026
  286. project: winhc_eci_dev
  287. tableName: company_icp
  288. dupliCols: new_cid,liscense,domain
  289. flag: cid
  290. - _nodeId: 700003380225
  291. project: winhc_eci_dev
  292. tableName: company_icp
  293. cidField: new_cid
  294. dupliCols: new_cid,liscense,domain
  295. #run-1
  296. - taskName: company_app_info
  297. param:
  298. - _nodeId: 700003375026
  299. project: winhc_eci_dev
  300. tableName: company_app_info
  301. dupliCols: new_cid,name
  302. flag: cid
  303. - _nodeId: 700003380225
  304. project: winhc_eci_dev
  305. tableName: company_app_info
  306. cidField: new_cid
  307. dupliCols: new_cid,name
  308. #run -1
  309. - taskName: company_copyright_reg
  310. param:
  311. - _nodeId: 700003375026
  312. project: winhc_eci_dev
  313. tableName: company_copyright_reg
  314. dupliCols: new_cid,reg_num
  315. flag: cids
  316. - _nodeId: 700003380225
  317. project: winhc_eci_dev
  318. tableName: company_copyright_reg_list
  319. cidField: new_cid
  320. dupliCols: new_cid,reg_num
  321. #run-1
  322. - taskName: company_wechat
  323. param:
  324. - _nodeId: 700003375026
  325. project: winhc_eci_dev
  326. tableName: company_wechat
  327. dupliCols: new_cid,public_num
  328. flag: cid
  329. - _nodeId: 700003380225
  330. project: winhc_eci_dev
  331. tableName: company_wechat
  332. cidField: new_cid
  333. dupliCols: new_cid,public_num
  334. #run -1
  335. - taskName: company_tm
  336. param:
  337. - _nodeId: 700003375026
  338. project: winhc_eci_dev
  339. tableName: company_tm
  340. dupliCols: new_cid,reg_no
  341. flag: cid
  342. - _nodeId: 700003380225
  343. project: winhc_eci_dev
  344. tableName: company_tm
  345. cidField: new_cid
  346. dupliCols: new_cid,reg_no
  347. #run -1
  348. - taskName: company_patent
  349. param:
  350. - _nodeId: 700003375026
  351. project: winhc_eci_dev
  352. tableName: company_patent
  353. dupliCols: new_cid,pub_number,app_number
  354. flag: cids
  355. - _nodeId: 700003380225
  356. project: winhc_eci_dev
  357. tableName: company_patent_list
  358. cidField: new_cid
  359. dupliCols: new_cid,pub_number,app_number
  360. #run-1
  361. - taskName: company_copyright_works
  362. param:
  363. - _nodeId: 700003375026
  364. project: winhc_eci_dev
  365. tableName: company_copyright_works
  366. dupliCols: new_cid,reg_num
  367. flag: cids
  368. - _nodeId: 700003380225
  369. project: winhc_eci_dev
  370. tableName: company_copyright_works_list
  371. cidField: new_cid
  372. dupliCols: new_cid,reg_num