店播爬取Python脚本

dy_live_info_update.py 2.0KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768
  1. import time
  2. import threading
  3. import json
  4. from rds_model.rds_room_info_request_list import RdsRoomInfoRequestList
  5. from libs.room_info import RoomInfo
  6. from log.print_log import PrintLog
  7. def scrape():
  8. rds_list = RdsRoomInfoRequestList()
  9. while True:
  10. try:
  11. room_id = rds_list.get_request_param()
  12. if room_id is None:
  13. time.sleep(0.1)
  14. continue
  15. room_id = str(room_id)
  16. if not room_id.isdigit(): # 检测直播间ID是否非法-仅包含数字
  17. continue
  18. PrintLog.print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' ' + room_id + '开始抓取直播信息')
  19. response_json = RoomInfo.get_data(room_id=room_id)
  20. if response_json is None:
  21. rds_list.record_score(0)
  22. rds_list.push_request_id(room_id)
  23. PrintLog.print(
  24. time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + '数据获取失败!响应数据为空!' + '\n'
  25. + room_id + '\n'
  26. )
  27. data = json.dumps({
  28. "data": response_json.get('data').get('room'),
  29. "extra": {
  30. 'room_id': room_id
  31. }
  32. })
  33. print('爬取成功')
  34. rds_list.record_score(1)
  35. rds_list.push_data_list(data)
  36. except Exception as e:
  37. rds_list.record_score(0)
  38. rds_list.push_request_id(room_id)
  39. PrintLog.print(time.strftime("%H:%M:%S", time.localtime()) + ' ' + room_id + '数据异常:' + str(e))
  40. time.sleep(0.1)
  41. if __name__ == "__main__":
  42. print("主方法开始执行")
  43. rds = RdsRoomInfoRequestList()
  44. print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' ' + ' 开始执行,待更新直播队列长度:' + str(rds.get_len()))
  45. for i in range(1, 100):
  46. task = threading.Thread(target=scrape, name=i)
  47. task.start() # 准备就绪,等待cpu执行