import time import threading import json from rds_model.rds_room_info_request_list import RdsRoomInfoRequestList from libs.room_info import RoomInfo from log.print_log import PrintLog def scrape(): rds_list = RdsRoomInfoRequestList() while True: try: room_id = rds_list.get_request_param() if room_id is None: time.sleep(0.1) continue room_id = str(room_id) if not room_id.isdigit(): # 检测直播间ID是否非法-仅包含数字 continue PrintLog.print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' ' + room_id + '开始抓取直播信息') response_json = RoomInfo.get_data(room_id=room_id) if response_json is None: rds_list.record_score(0) rds_list.push_request_id(room_id) PrintLog.print( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + '数据获取失败!响应数据为空!' + '\n' + room_id + '\n' ) data = json.dumps({ "data": response_json.get('data').get('room'), "extra": { 'room_id': room_id } }) print('爬取成功') rds_list.record_score(1) rds_list.push_data_list(data) except Exception as e: rds_list.record_score(0) rds_list.push_request_id(room_id) PrintLog.print(time.strftime("%H:%M:%S", time.localtime()) + ' ' + room_id + '数据异常:' + str(e)) time.sleep(0.1) if __name__ == "__main__": print("主方法开始执行") rds = RdsRoomInfoRequestList() print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' ' + ' 开始执行,待更新直播队列长度:' + str(rds.get_len())) for i in range(1, 100): task = threading.Thread(target=scrape, name=i) task.start() # 准备就绪,等待cpu执行