JobMain.py 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
  1. # -*- coding: utf-8 -*-
  2. # @Time : 2023/7/20 16:13
  3. # @Author : XuJiakai
  4. # @File : JobMain
  5. # @Software: PyCharm
  6. import asyncio
  7. import json
  8. from functools import partial
  9. import aio_pika
  10. from aio_pika import IncomingMessage
  11. from environs import Env
  12. from data_clean.task_distributor import task_distribute
  13. from loguru import logger as log
  14. from data_clean.utils.async_client import get_aio_kafka_consumer, get_aio_kafka_producer, get_rabbitmq_connection
  15. from data_clean.utils.asyncio_pool import AsyncPool
  16. json.dumps = partial(json.dumps, ensure_ascii=False)
  17. env = Env()
  18. base_topic = env.str("base_topic", "rt_other_dim")
  19. source_topic = env.str("source_topic", base_topic) # "rt_company_dim"
  20. target_topic = env.str("target_topic", base_topic) # "rt_company_dim"
  21. max_concurrency = env.int("concurrency", 1)
  22. async def handle(producer, data: dict):
  23. result = await task_distribute(data)
  24. # print("send : ", result)
  25. if result is not None:
  26. await producer.send_and_wait(target_topic, json.dumps(result).encode())
  27. pass
  28. pass
  29. async def on_message_received(producer, msg: IncomingMessage):
  30. data: dict = json.loads(msg.body)
  31. await handle(producer, data)
  32. await msg.ack()
  33. pass
  34. async def main_for_rabbitmq():
  35. log.info("start job. Listening queue : {} , send topic: {} , max concurrency: {}", source_topic, target_topic,
  36. max_concurrency)
  37. pool = AsyncPool(max_concurrency)
  38. producer = get_aio_kafka_producer()
  39. await producer.start()
  40. queue_name = source_topic # 只需要配置这个
  41. connection = await get_rabbitmq_connection()
  42. async with connection:
  43. channel: aio_pika.abc.AbstractChannel = await connection.channel()
  44. await channel.set_qos(prefetch_count=max_concurrency)
  45. # Declaring queue
  46. queue: aio_pika.abc.AbstractQueue = await channel.declare_queue(
  47. name=queue_name,
  48. durable=True,
  49. auto_delete=False
  50. )
  51. async with queue.iterator(no_ack=False) as queue_iter:
  52. # Cancel consuming after __aexit__
  53. async for message in queue_iter:
  54. message: IncomingMessage = message
  55. await pool.create_task(on_message_received(producer, message))
  56. # async with message.process(ignore_processed=True):
  57. # await pool.create_task(on_message_received(producer, message))
  58. # pass
  59. pass
  60. pass
  61. async def main_for_kafka():
  62. pool = AsyncPool(max_concurrency)
  63. consumer = get_aio_kafka_consumer(source_topic)
  64. producer = get_aio_kafka_producer()
  65. await producer.start()
  66. await consumer.start()
  67. try:
  68. # Consume messages
  69. async for msg in consumer:
  70. print("consumed: ", msg.topic, msg.partition, msg.offset,
  71. msg.key, msg.value, msg.timestamp)
  72. data: dict = json.loads(msg.value)
  73. await pool.create_task(handle(producer, data))
  74. finally:
  75. # Will leave consumer group; perform autocommit if enabled.
  76. await consumer.stop()
  77. pass
  78. if __name__ == '__main__':
  79. asyncio.run(main_for_rabbitmq())
  80. pass