JobMain.py 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106
  1. # -*- coding: utf-8 -*-
  2. # @Time : 2023/7/20 16:13
  3. # @Author : XuJiakai
  4. # @File : JobMain
  5. # @Software: PyCharm
  6. import asyncio
  7. import json
  8. from functools import partial
  9. import aio_pika
  10. from aio_pika import IncomingMessage
  11. from environs import Env
  12. from data_clean.task_distributor import task_distribute
  13. from data_clean.utils.async_client import get_aio_kafka_consumer, get_aio_kafka_producer, get_rabbitmq_connection
  14. from data_clean.utils.asyncio_pool import AsyncPool
  15. json.dumps = partial(json.dumps, ensure_ascii=False)
  16. env = Env()
  17. base_topic = env.str("base_topic", "rt_other_dim")
  18. source_topic = env.str("source_topic", base_topic) # "rt_company_dim"
  19. target_topic = env.str("target_topic", base_topic) # "rt_company_dim"
  20. max_concurrency = env.int("concurrency", 1)
  21. async def handle(producer, data: dict):
  22. result = await task_distribute(data)
  23. # print("send : ", result)
  24. if result is not None:
  25. await producer.send_and_wait(target_topic, json.dumps(result).encode())
  26. pass
  27. pass
  28. async def on_message_received(producer, msg: IncomingMessage):
  29. data: dict = json.loads(msg.body)
  30. await handle(producer, data)
  31. await msg.ack()
  32. pass
  33. async def main_for_rabbitmq():
  34. print("start job. Listening queue :", source_topic, "send topic:", target_topic, "max concurrency:",
  35. max_concurrency)
  36. pool = AsyncPool(max_concurrency)
  37. producer = get_aio_kafka_producer()
  38. await producer.start()
  39. queue_name = source_topic # 只需要配置这个
  40. connection = await get_rabbitmq_connection()
  41. async with connection:
  42. channel: aio_pika.abc.AbstractChannel = await connection.channel()
  43. await channel.set_qos(prefetch_count=max_concurrency)
  44. # Declaring queue
  45. queue: aio_pika.abc.AbstractQueue = await channel.declare_queue(
  46. name=queue_name,
  47. durable=True,
  48. auto_delete=False
  49. )
  50. async with queue.iterator(no_ack=False) as queue_iter:
  51. # Cancel consuming after __aexit__
  52. async for message in queue_iter:
  53. message: IncomingMessage = message
  54. await pool.create_task(on_message_received(producer, message))
  55. # async with message.process(ignore_processed=True):
  56. # await pool.create_task(on_message_received(producer, message))
  57. # pass
  58. pass
  59. pass
  60. async def main_for_kafka():
  61. pool = AsyncPool(max_concurrency)
  62. consumer = get_aio_kafka_consumer(source_topic)
  63. producer = get_aio_kafka_producer()
  64. await producer.start()
  65. await consumer.start()
  66. try:
  67. # Consume messages
  68. async for msg in consumer:
  69. print("consumed: ", msg.topic, msg.partition, msg.offset,
  70. msg.key, msg.value, msg.timestamp)
  71. data: dict = json.loads(msg.value)
  72. await pool.create_task(handle(producer, data))
  73. finally:
  74. # Will leave consumer group; perform autocommit if enabled.
  75. await consumer.stop()
  76. pass
  77. if __name__ == '__main__':
  78. asyncio.run(main_for_rabbitmq())
  79. pass