from rds_model.rds_room_lottery_request_list import RdsRoomLotteryRequestList import time import json import sys import threading import random import urllib import requests from rds_model.db_redis import DbRedis from log.print_log import PrintLog from libs.Xg04 import X_Gorgon from libs.proxy import Proxy start_time = time.time() def get_random(i, random_type=1): if random_type == 1: return str(random.randint(1 * 10 ** (i - 1), 1 * 10 ** i - 1)) elif random_type == 8: seed = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" sa = [] for i in range(i): sa.append(random.choice(seed)) salt = ''.join(sa) return salt else: seed = "1234567890abcde" sa = [] for i in range(i): sa.append(random.choice(seed)) salt = ''.join(sa) return salt def get_random_brand_type(): brand_type = get_random(3, random_type=8) + '-' + get_random(2, random_type=8) + '00' return brand_type def get_mc(): def a(): seed = "1234567890ABCDEF" sa = [] for i in range(2): sa.append(random.choice(seed)) salt = ''.join(sa) return salt k = '' for i in range(6): k += a() + ':' return k[:-1] def get_live_lottery_data(room_id): domain = 'webcast5-normal-ipv6-lf.amemv.com' url = 'https://' + domain + '/webcast/lottery/melon/check_user_right/?' rticket = str(int(time.time() * 1000)) mc = get_mc udid = '8604' + get_random(11) # openudid = '3b22' + str(udid.uuid4())[-12:] query = { "room_id" : room_id, "webcast_sdk_version" : "1690", "webcast_language" : "zh", "webcast_locale" :"zh_CN", "webcast_gps_access" : "1", "os_api" : "23", "device_type" : "SM-G9200", "ssmix" : "a", "manifest_version_code" : "120801", "dpi" : "640", "app_name" : "aweme", "version_name" : "12.8.0", "ts" : "1629688012", "cpu_support64" : "true", "storage_type" : "0", "app_type" : "normal", "appTheme" : "dark", "ac" : "wifi", "host_abi" : "armeabi-v7a", "update_version_code" : "12809900", "channel" : "wandoujia_douyinnew_1128", "_rticket" : rticket, # "_rticket" : "1629688012123", "device_platform" : "android", "iid" : "3158219122552359", "version_code" : "120800", "mac_address" : mc, #"mac_address" : "FC%3ADB%3AB3%3A56%3ABD%3AFD", "cdid" : "6c96979e-c729-419c-9516-3a85a7338d0c", "openudid" : "89bb178775bf2aa9", "device_id" : "60314377681", "resolution" : "1440*2560", "os_version" : "6.0.1", "language" : "zh", "device_brand":"samsung", "aid" : "1128" } query_params = urllib.parse.urlencode(query) url = url + query_params body = '' xGorgon = X_Gorgon(query_params, body) userAgent = 'okhttp/3.' + str(random.randint(0, 10)) + '.' + str(random.randint(0, 10)) + '.' + str( random.randint(1, 10)) headers = { 'Host': domain, 'Connection': 'keep-alive', 'Cache-Control': 'max-age=0', 'Upgrade-Insecure-Requests': '1', 'User-Agent': userAgent, 'accept-encoding': 'gzip, deflate', # "x-SS-REQ-TICKET": rticket, "x-gorgon": xGorgon.get('X-Gorgon'), "x-khronos": xGorgon.get('X-Khronos'), 'passport-sdk-version' : '17', 'sdk-version' : '2', 'x-ss-dp' : '1128', 'x-tt-trace-id' : '00-70f99f2209e0b045dd14266ee1da0468-70f99f2209e0b045-01', } retry = 0 response_json = None while True: if retry > 10: break retry += 1 proxy = Proxy.get() proxies = { "http": "http://" + proxy, "https": "http://" + proxy } try: response = requests.get( url, headers=headers, proxies=proxies, timeout=8 ) if (response.status_code == 200) and (response.text is not None) and (response.text != ''): response_json = response.json() if (response_json.get('data') is not None): break else: print( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' 数据获取失败!' + '\n' + room_id + '\n' + response.text + Proxy.proxy_info ) else: print( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' 爬取http连接失败!' + str(response.status_code) + '\n' + Proxy.proxy_info + '\n' + room_id + '\n' ) time.sleep(1) except requests.exceptions.ProxyError as e: print( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' 代理过期!' + str(e) + '\n' + room_id + '\n' + Proxy.proxy_info ) Proxy.del_proxy(proxy) pass except requests.exceptions.ConnectTimeout as e: print( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' ConnectTimeout!' + str(e) + '\n' + room_id + '\n' + Proxy.proxy_info ) Proxy.del_proxy(proxy) pass except Exception as e: print( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' 请求抛出异常!' + str(e) + '\n' + room_id + '\n' + Proxy.proxy_info ) pass return response_json def scrape(): rds_list = RdsRoomLotteryRequestList() room_info = rds.get_request_param() if room_info is None: return None room_info = json.loads(room_info) room_id = room_info.get('room_id') room_id = str(room_id) print( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' ' + str(room_id) ) try: response_json = get_live_lottery_data(room_id) if response_json is None: # rds_list.record_score(0) # rds_list.push_request_id(room_id) print( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' 数据获取失败!响应数据为空!' + '\n' + room_id + '\n' ) sys.exit(0) data = json.dumps({ "data": response_json.get('data'), "extra": { 'room_id': room_id } }) # rds_list.record_score(1) rds_list.push_data_list(data) except Exception as e: # rds_list.record_score(0) # rds_list.push_request_id(room_id) print( time.strftime("%H:%M:%S", time.localtime()) + ' ' + room_id + '数据异常:' + str(e) ) sys.exit(0) if __name__ == "__main__": print("主方法开始执行") # 并行线程数 threading_count = int(sys.argv[1]) rds = RdsRoomLotteryRequestList() print( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' ' + ' 开始执行,更新直播队列长度:' + str(rds.get_len()) ) while True: sys.stdout.flush() # 减去主线程 active_count = threading.active_count() - 1 increment = threading_count - active_count while increment > 0: sys.stdout.flush() # scrape() task = threading.Thread(target=scrape, args=()) task.start() # 准备就绪, 等待cpu执行 increment = increment - 1 current_time = time.time() if current_time - start_time > 3600: print( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' 主方法执行终止' ) sys.exit(0) time.sleep(1)