1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768 |
- import time
- import threading
- import json
- from rds_model.rds_room_info_request_list import RdsRoomInfoRequestList
- from libs.room_info import RoomInfo
- from log.print_log import PrintLog
- def scrape():
- rds_list = RdsRoomInfoRequestList()
- while True:
- try:
- room_id = rds_list.get_request_param()
- if room_id is None:
- time.sleep(0.1)
- continue
- room_id = str(room_id)
- if not room_id.isdigit(): # 检测直播间ID是否非法-仅包含数字
- continue
- PrintLog.print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' ' + room_id + '开始抓取直播信息')
- response_json = RoomInfo.get_data(room_id=room_id)
- if response_json is None:
- rds_list.record_score(0)
- rds_list.push_request_id(room_id)
- PrintLog.print(
- time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + '数据获取失败!响应数据为空!' + '\n'
- + room_id + '\n'
- )
- data = json.dumps({
- "data": response_json.get('data').get('room'),
- "extra": {
- 'room_id': room_id
- }
- })
- print('爬取成功')
- rds_list.record_score(1)
- rds_list.push_data_list(data)
- except Exception as e:
- rds_list.record_score(0)
- rds_list.push_request_id(room_id)
- PrintLog.print(time.strftime("%H:%M:%S", time.localtime()) + ' ' + room_id + '数据异常:' + str(e))
- time.sleep(0.1)
- if __name__ == "__main__":
- print("主方法开始执行")
- rds = RdsRoomInfoRequestList()
- print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' ' + ' 开始执行,待更新直播队列长度:' + str(rds.get_len()))
- for i in range(1, 100):
- task = threading.Thread(target=scrape, name=i)
- task.start() # 准备就绪,等待cpu执行
|