from rds_model.rds_room_lottery_request_list import RdsRoomLotteryRequestList import time import json import sys import threading import random import urllib import requests from rds_model.db_redis import DbRedis from log.print_log import PrintLog from libs.Xg04 import X_Gorgon from libs.proxy import Proxy start_time = time.time() def get_random(i, random_type=1): if random_type == 1: return str(random.randint(1 * 10 ** (i - 1), 1 * 10 ** i - 1)) elif random_type == 8: seed = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" sa = [] for i in range(i): sa.append(random.choice(seed)) salt = ''.join(sa) return salt else: seed = "1234567890abcde" sa = [] for i in range(i): sa.append(random.choice(seed)) salt = ''.join(sa) return salt def get_random_brand_type(): brand_type = get_random(3, random_type=8) + '-' + get_random(2, random_type=8) + '00' return brand_type def get_random_trace(): random_one = 'c' + get_random(15) random_two = 'ae6' + get_random(9) + '0468' trace_id = '00-' + str(random_one) + str(random_two) + '-' + str(random_one) + '-01' return trace_id def get_mc(): def a(): seed = "1234567890ABCDEF" sa = [] for i in range(2): sa.append(random.choice(seed)) salt = ''.join(sa) return salt k = '' for i in range(6): k += a() + ':' return k[:-1] def get_trace(): trace_list = [ '00-70f99f2209e0b045dd14266ee1da0468-70f99f2209e0b045-01', '00-ce7faf4409b7fcc0ae6135fdd4250468-ce7faf4409b7fcc0-01', '00-ce7faf3b09b7fcc0ae6042f1d8100468-ce7faf3b09b7fcc0-01', '00-cdd79d2309b7fcc0ae6625a4cb190468-cdd79d2309b7fcc0-01', '00-cefde9f009b7fcc0ae6750e1349e0468-cefde9f009b7fcc0-01', '00-ced2e6ef09b7fcc0ae67dd7bfe000468-ced2e6ef09b7fcc0-01', '00-cefbfeb509b7fcc0ae659396a6ea0468-cefbfeb509b7fcc0-01', '00-cefaa25409b7fcc0ae657726a3c30468-cefaa25409b7fcc0-01', '00-6eb478cf09b7fcc0ae6e437c9e160468-6eb478cf09b7fcc0-01', '00-6eb4752709b7fcc0ae69f22235260468-6eb4752709b7fcc0-01', '00-6eb4469109b7fcc0ae6ba6f250a90468-6eb4469109b7fcc0-01', '00-6eb43ea209b7fcc0ae66a34128bd0468-6eb43ea209b7fcc0-01', ] return random.choice(trace_list) def get_live_lottery_data(room_id, result): domain = 'webcast5-normal-ipv6-lf.amemv.com' url = 'https://' + domain + '/webcast/lottery/melon/check_user_right/?' rticket = str(int(time.time() * 1000)) mc = get_mc udid = '8604' + get_random(11) trace_id = get_trace() # trace_id = get_random_trace() # openudid = '3b22' + str(udid.uuid4())[-12:] device_id, iid, udid, openudid, cookie, V1, V2, device_type, device_brand = result[0], result[1], result[3], result[2], result[4], result[8], result[9], result[10], result[11] query = { "room_id" : str(room_id), "webcast_sdk_version" : "1690", "webcast_language" : "zh", "webcast_locale" :"zh_CN", "webcast_gps_access" : "1", "os_api" : "23", "device_type" : device_type, "ssmix" : "a", "manifest_version_code" : "120801", "dpi" : "640", "app_name" : "aweme", "version_name" : "12.8.0", "ts" : int(time.time()), "cpu_support64" : "true", "storage_type" : "0", "app_type" : "normal", "appTheme" : "dark", "ac" : "wifi", "host_abi" : "armeabi-v7a", "update_version_code" : "12809900", "channel" : "wandoujia_douyinnew_1128", "_rticket" : rticket, # "_rticket" : "1629688012123", "device_platform" : "android", "iid" : str(iid), "version_code" : "120800", "mac_address" : mc, #"mac_address" : "FC%3ADB%3AB3%3A56%3ABD%3AFD", "cdid" : "6c96979e-c729-419c-9516-3a85a7338d0c", "openudid" : str(openudid), "device_id" : str(device_id), "resolution" : "1440*2560", "os_version" : "6.0.1", "language" : "zh", "device_brand":device_brand, "aid" : "1128" } query_params = urllib.parse.urlencode(query) url = url + query_params body = '' xGorgon = X_Gorgon(query_params, body) userAgent = userAgent = f'com.ss.android.ugc.aweme/1208000 (Linux; U; Android 5.1.1; zh_CN; {device_type}; Build/LMY47V; Cronet/58.0.2991.0)' headers = { 'Host': domain, 'Connection': 'keep-alive', 'Cache-Control': 'max-age=0', 'Upgrade-Insecure-Requests': '1', 'User-Agent': userAgent, # 'accept-encoding': 'gzip, deflate', # "x-SS-REQ-TICKET": rticket, "x-gorgon": xGorgon.get('X-Gorgon'), "x-khronos": xGorgon.get('X-Khronos'), 'passport-sdk-version' : '17', 'sdk-version' : '2', 'x-ss-dp' : '1128', 'x-tt-trace-id' : trace_id } retry = 0 response_json = None while True: if retry > 3: break retry += 1 proxy = Proxy.dailiyun_get() proxies = { "http": "http://" + proxy, "https": "http://" + proxy } try: response = requests.get( url, headers=headers, proxies=proxies, timeout=8 ) if (response.status_code == 200) and (response.text is not None) and (response.text != ''): response_json = response.json() if (response_json.get('data') is not None): print( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' 数据获取成功!' + '\n' + room_id + '\n' ) break else: print( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' 数据获取失败!' + '\n' + room_id + '\n' + response.text + Proxy.proxy_info ) else: print( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' 爬取http连接失败!' + str(response.status_code) + '\n' + Proxy.proxy_info + '\n' + room_id + 'trace_id:' + trace_id + '\n' + '爬取结果:' + str(response) + '\n' ) time.sleep(1) except requests.exceptions.ProxyError as e: print( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' 代理过期!' + str(e) + '\n' + room_id + '\n' + Proxy.proxy_info ) Proxy.dailiyun_del_proxy(proxy) pass except requests.exceptions.ConnectTimeout as e: print( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' ConnectTimeout!' + str(e) + '\n' + room_id + '\n' + Proxy.proxy_info ) Proxy.dailiyun_del_proxy(proxy) pass except Exception as e: print( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' 请求抛出异常!' + str(e) + '\n' + room_id + '\n' + Proxy.proxy_info ) pass return response_json def scrape(): rds_list = RdsRoomLotteryRequestList() while(True): room_info = rds.get_request_param() if room_info is None: return None room_info = json.loads(room_info) room_id = room_info.get('room_id') room_id = str(room_id) print( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' ' + str(room_id) ) key = 'DOUYIN_SCRAPE_DID_IID_TTREQ_1221' rdid = DbRedis.connect().rpop(key) if rdid: result = rdid.split('@@@') else: return None DbRedis.connect().lpush(key, rdid) try: response_json = get_live_lottery_data(room_id, result) if response_json is None: # rds_list.record_score(0) # rds_list.push_request_id(room_id) print( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' 数据获取失败!响应数据为空!' + '\n' + room_id + '\n' ) # sys.exit(0) data = json.dumps({ "data": response_json.get('data'), "extra": { 'room_id': room_id } }) # rds_list.record_score(1) rds_list.push_data_list(data) except Exception as e: # rds_list.record_score(0) rds_list.push_request_id(room_id) print( time.strftime("%H:%M:%S", time.localtime()) + ' ' + room_id + '数据异常:' + str(e) ) # sys.exit(0) if __name__ == "__main__": print("主方法开始执行") # 并行线程数 threading_count = int(sys.argv[1]) rds = RdsRoomLotteryRequestList() print( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' ' + ' 开始执行,更新直播队列长度:' + str(rds.get_len()) ) while True: sys.stdout.flush() # 减去主线程 active_count = threading.active_count() - 1 increment = threading_count - active_count if increment > 0: sys.stdout.flush() # scrape() task = threading.Thread(target=scrape, args=()) task.start() # 准备就绪, 等待cpu执行 # increment = increment - 1 current_time = time.time() if current_time - start_time > 3600: print( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' 主方法执行终止' ) sys.exit(0) time.sleep(1)