data_schema_utils.py 1.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859
  1. # -*- coding: utf-8 -*-
  2. # @Time : 2023/8/22 10:37
  3. # @Author : XuJiakai
  4. # @File : data_schema_utils
  5. # @Software: PyCharm
  6. import asyncio
  7. import json
  8. from data_clean.api.http_api import get
  9. def _cast_by_type(val, val_type: list):
  10. if val == '':
  11. return None
  12. val_type = [i for i in val_type if i != 'null']
  13. val_type = val_type[0]
  14. if val_type == 'array' or val_type == 'object':
  15. return json.loads(val)
  16. if val_type == 'number':
  17. return int(val)
  18. return val
  19. pass
  20. async def get_data_schema(tn: str):
  21. res = await get('https://bigdata-rt.oss-cn-shanghai.aliyuncs.com/business-schema/' + tn + '.schema',
  22. result_json=False)
  23. res = json.loads(res)
  24. # print(res)
  25. return res
  26. pass
  27. async def record_to_json(tn, record_json):
  28. json_schema = await get_data_schema(tn)
  29. json_schema = json_schema['properties']
  30. result_json = {}
  31. for key in record_json:
  32. key_lower = key.lower()
  33. if key_lower in json_schema:
  34. result_json[key_lower] = _cast_by_type(record_json[key], json_schema[key_lower]['type'])
  35. pass
  36. return result_json
  37. pass
  38. async def test():
  39. await get_data_schema("company_court_open_announcement")
  40. pass
  41. if __name__ == '__main__':
  42. asyncio.run(test())
  43. pass