from rds_model.rds_room_lottery_request_list import RdsRoomLotteryRequestList import time import json import sys import threading import random import urllib import requests from rds_model.db_redis import DbRedis from log.print_log import PrintLog from libs.Xg04 import X_Gorgon from libs.proxy import Proxy start_time = int(time.time()) def get_random(i, random_type=1): if random_type == 1: return str(random.randint(1 * 10 ** (i - 1), 1 * 10 ** i - 1)) elif random_type == 8: seed = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" sa = [] for i in range(i): sa.append(random.choice(seed)) salt = ''.join(sa) return salt else: seed = "1234567890abcde" sa = [] for i in range(i): sa.append(random.choice(seed)) salt = ''.join(sa) return salt def get_random_brand_type(): brand_type = get_random(3, random_type=8) + '-' + get_random(2, random_type=8) + '00' return brand_type def get_random_trace(): random_one = 'c' + get_random(15) random_two = 'ae6' + get_random(9) + '0468' trace_id = '00-' + str(random_one) + str(random_two) + '-' + str(random_one) + '-01' return trace_id def get_mc(): def a(): seed = "1234567890ABCDEF" sa = [] for i in range(2): sa.append(random.choice(seed)) salt = ''.join(sa) return salt k = '' for i in range(6): k += a() + ':' return k[:-1] def get_trace(): trace_list = [ '00-70f99f2209e0b045dd14266ee1da0468-70f99f2209e0b045-01', '00-ce7faf4409b7fcc0ae6135fdd4250468-ce7faf4409b7fcc0-01', '00-ce7faf3b09b7fcc0ae6042f1d8100468-ce7faf3b09b7fcc0-01', '00-cdd79d2309b7fcc0ae6625a4cb190468-cdd79d2309b7fcc0-01', '00-cefde9f009b7fcc0ae6750e1349e0468-cefde9f009b7fcc0-01', '00-ced2e6ef09b7fcc0ae67dd7bfe000468-ced2e6ef09b7fcc0-01', '00-cefbfeb509b7fcc0ae659396a6ea0468-cefbfeb509b7fcc0-01', '00-cefaa25409b7fcc0ae657726a3c30468-cefaa25409b7fcc0-01', '00-6eb478cf09b7fcc0ae6e437c9e160468-6eb478cf09b7fcc0-01', '00-6eb4752709b7fcc0ae69f22235260468-6eb4752709b7fcc0-01', '00-6eb4469109b7fcc0ae6ba6f250a90468-6eb4469109b7fcc0-01', '00-6eb43ea209b7fcc0ae66a34128bd0468-6eb43ea209b7fcc0-01', '00-98d3c43df91c019bdf0d508485048c4c-98d3c43df91c019b-01', '00-a77c64227e911ee70811198281710139-a77c64227e911ee7-01', '00-354016da2d2a9c3b31f771521b8c3316-354016da2d2a9c3b-01', ] return random.choice(trace_list) def get_live_lottery_data(room_id): domain = 'webcast5-normal-ipv6-lf.amemv.com' url = 'https://' + domain + '/webcast/lottery/melon/check_user_right/?' retry = 0 response_json = None while True: if retry > 30: break # key = 'DOUYIN_SCRAPE_DID_IID_TTREQ_1221' # rdid = DbRedis.connect().rpop(key) # if rdid: # result = rdid.split('@@@') # DbRedis.connect().lpush(key, rdid) # else: # time.sleep(1) # continue # # return None key = "DOUYIN_REGISTER_QUEUE" register_device = DbRedis.connect().rpop(key) if register_device: result = json.loads(register_device) DbRedis.connect().lpush(key, register_device) else: time.sleep(0.1) continue retry += 1 rticket = str(int(time.time() * 1000)) mc = get_mc udid = '8604' + get_random(11) trace_id = get_trace() # trace_id = get_random_trace() # openudid = '3b22' + str(udid.uuid4())[-12:] # device_id, iid, udid, openudid, cookie, V1, V2, device_type, device_brand = result[0], result[1], result[3], result[2], result[4], result[8], result[9], result[10], result[11] device_id, iid, udid, openudid, cookie = result['device_id'], result['iid'], result['uuid'], result['openudid'], result['cookie'] query = { "room_id" : str(room_id), "webcast_sdk_version" : "1690", "webcast_language" : "zh", "webcast_locale" :"zh_CN", "webcast_gps_access" : "1", "os_api" : "23", "device_type" : "HSF-FL00", "ssmix" : "a", "manifest_version_code" : "120801", "dpi" : "640", "app_name" : "aweme", "version_name" : "12.8.0", "ts" : str(int(time.time())), "cpu_support64" : "true", "storage_type" : "0", "app_type" : "normal", "appTheme" : "dark", "ac" : "wifi", "host_abi" : "armeabi-v7a", "update_version_code" : "12809900", "channel" : "wandoujia_douyinnew_1128", "_rticket" : rticket, # "_rticket" : "1629688012123", "device_platform" : "android", "iid" : str(iid), "version_code" : "120800", "mac_address" : mc, #"mac_address" : "FC%3ADB%3AB3%3A56%3ABD%3AFD", "cdid" : "6c96979e-c729-419c-9516-3a85a7338d0c", "openudid" : str(openudid), "device_id" : str(device_id), "resolution" : "1440*2560", "os_version" : "6.0.1", "language" : "zh", "device_brand" : "HUAWEI", "aid" : "1128" } query_params = urllib.parse.urlencode(query) url = url + query_params body = "" xGorgon = X_Gorgon(query_params, body) userAgent = "com.ss.android.ugc.aweme/1208000 (Linux; U; Android 5.1.1; zh_CN; HSF-FL00; Build/LMY47V; Cronet/58.0.2991.0)" headers = { "Host" : domain, "Connection" : "keep-alive", "Cache-Control" : "max-age=0", "Upgrade-Insecure-Requests" : "1", "User-Agent" : userAgent, # 'accept-encoding': 'gzip, deflate', # "x-SS-REQ-TICKET": rticket, "x-gorgon" : xGorgon.get('X-Gorgon'), "x-khronos" : xGorgon.get('X-Khronos'), "passport-sdk-version" : "17", "sdk-version" : "2", "x-ss-dp" : "1128", "x-tt-trace-id" : trace_id, "cookie" : cookie } proxy = Proxy.get() proxies = { "http": "http://" + proxy, "https": "http://" + proxy } try: response = requests.get( url, headers=headers, proxies=proxies, timeout=8 ) if (response.status_code == 200) and (response.text is not None) and (response.text != ''): response_json = response.json() if (response_json.get('data') is not None): print( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' 数据获取成功!' + '\n' + room_id + 'trace_id:' + trace_id + '\n' ) break else: print( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' 数据获取失败!' + '\n' + room_id + 'trace_id:' + trace_id + '\n' + response.text + Proxy.proxy_info ) else: print( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' 爬取http连接失败!' + str(response.status_code) + '\n' + Proxy.proxy_info + '\n' + room_id + 'trace_id:' + trace_id + '\n' + '爬取结果:' + str(response) + '\n' ) time.sleep(1) except requests.exceptions.ProxyError as e: print( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' 代理过期!' + str(e) + '\n' + room_id + 'trace_id:' + trace_id + '\n' + Proxy.proxy_info ) Proxy.del_proxy(proxy) pass except requests.exceptions.ConnectTimeout as e: print( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' ConnectTimeout!' + str(e) + '\n' + room_id + 'trace_id:' + trace_id + '\n' + Proxy.proxy_info ) Proxy.del_proxy(proxy) pass except Exception as e: print( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' 请求抛出异常!' + str(e) + '\n' + room_id + 'trace_id:' + trace_id + '\n' + Proxy.proxy_info ) pass return response_json def scrape(): rds_list = RdsRoomLotteryRequestList() while True: room_info = rds.get_request_param() if room_info is None: time.sleep(1) continue # return None room_id = str(room_info) print( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' ' + str(room_id) ) try: response_json = get_live_lottery_data(room_id) if response_json is None: # rds_list.record_score(0) # rds_list.push_request_id(room_id) print( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' 数据获取失败!响应数据为空!' + '\n' + room_id + '\n' ) # sys.exit(0) else: data = json.dumps({ "data": response_json.get('data'), "extra": { 'room_id': room_id } }) # rds_list.record_score(1) rds_list.push_data_list(data) except Exception as e: # rds_list.record_score(0) rds_list.push_request_id(room_id) print( time.strftime("%H:%M:%S", time.localtime()) + ' ' + room_id + '数据异常:' + str(e) ) # sys.exit(0) if __name__ == "__main__": print("主方法开始执行") # 并行线程数 threading_count = int(sys.argv[1]) num = int(sys.argv[2]) rds = RdsRoomLotteryRequestList() print( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' ' + ' 开始执行,更新直播队列长度:' + str(rds.get_len()) ) while True: sys.stdout.flush() # 减去主线程 active_count = threading.active_count() - 1 increment = threading_count - active_count if increment > 0: sys.stdout.flush() task = threading.Thread(target=scrape, args=()) task.start() # 准备就绪, 等待cpu执行 # increment = increment - 1 # current_time = time.time() # if current_time - start_time > 3600: # print( # time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) # + ' 主方法执行终止' # ) # sys.exit(0) time.sleep(1)