JobMain.py 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103
  1. # -*- coding: utf-8 -*-
  2. # @Time : 2023/7/20 16:13
  3. # @Author : XuJiakai
  4. # @File : JobMain
  5. # @Software: PyCharm
  6. import asyncio
  7. import json
  8. import aio_pika
  9. from aio_pika import IncomingMessage
  10. from environs import Env
  11. from data_clean.task_distributor import task_distribute
  12. from data_clean.utils.async_client import get_aio_kafka_consumer, get_aio_kafka_producer, get_rabbitmq_connection
  13. from data_clean.utils.asyncio_pool import AsyncPool
  14. env = Env()
  15. base_topic = env.str("base_topic", "rt_other_dim")
  16. source_topic = env.str("source_topic", base_topic) # "rt_company_dim"
  17. target_topic = env.str("target_topic", base_topic) # "rt_company_dim"
  18. max_concurrency = env.int("concurrency", 1)
  19. async def handle(producer, data: dict):
  20. result = await task_distribute(data)
  21. print("send : ", result)
  22. if result is not None:
  23. await producer.send_and_wait(target_topic, json.dumps(result).encode())
  24. pass
  25. pass
  26. async def on_message_received(producer, msg: IncomingMessage):
  27. data: dict = json.loads(msg.body)
  28. await handle(producer, data)
  29. await msg.ack()
  30. pass
  31. async def main_for_rabbitmq():
  32. print("start job. Listening queue :", source_topic, "send topic:", target_topic, "max concurrency:",
  33. max_concurrency)
  34. pool = AsyncPool(max_concurrency)
  35. producer = get_aio_kafka_producer()
  36. await producer.start()
  37. queue_name = source_topic # 只需要配置这个
  38. connection = await get_rabbitmq_connection()
  39. async with connection:
  40. channel: aio_pika.abc.AbstractChannel = await connection.channel()
  41. await channel.set_qos(prefetch_count=max_concurrency)
  42. # Declaring queue
  43. queue: aio_pika.abc.AbstractQueue = await channel.declare_queue(
  44. name=queue_name,
  45. durable=True,
  46. auto_delete=False
  47. )
  48. async with queue.iterator(no_ack=False) as queue_iter:
  49. # Cancel consuming after __aexit__
  50. async for message in queue_iter:
  51. message: IncomingMessage = message
  52. await pool.create_task(on_message_received(producer, message))
  53. # async with message.process(ignore_processed=True):
  54. # await pool.create_task(on_message_received(producer, message))
  55. # pass
  56. pass
  57. pass
  58. async def main_for_kafka():
  59. pool = AsyncPool(max_concurrency)
  60. consumer = get_aio_kafka_consumer(source_topic)
  61. producer = get_aio_kafka_producer()
  62. await producer.start()
  63. await consumer.start()
  64. try:
  65. # Consume messages
  66. async for msg in consumer:
  67. print("consumed: ", msg.topic, msg.partition, msg.offset,
  68. msg.key, msg.value, msg.timestamp)
  69. data: dict = json.loads(msg.value)
  70. await pool.create_task(handle(producer, data))
  71. finally:
  72. # Will leave consumer group; perform autocommit if enabled.
  73. await consumer.stop()
  74. pass
  75. if __name__ == '__main__':
  76. asyncio.run(main_for_rabbitmq())
  77. pass