123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373 |
- from rds_model.rds_room_lottery_request_list import RdsRoomLotteryRequestList
- import time
- import json
- import sys
- import threading
- import random
- import urllib
- import requests
- from rds_model.db_redis import DbRedis
- from log.print_log import PrintLog
- from libs.Xg04 import X_Gorgon
- from libs.proxy import Proxy
- start_time = int(time.time())
- def get_random(i, random_type=1):
- if random_type == 1:
- return str(random.randint(1 * 10 ** (i - 1), 1 * 10 ** i - 1))
- elif random_type == 8:
- seed = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
- sa = []
- for i in range(i):
- sa.append(random.choice(seed))
- salt = ''.join(sa)
- return salt
- else:
- seed = "1234567890abcde"
- sa = []
- for i in range(i):
- sa.append(random.choice(seed))
- salt = ''.join(sa)
- return salt
- def get_random_brand_type():
- brand_type = get_random(3, random_type=8) + '-' + get_random(2, random_type=8) + '00'
- return brand_type
- def get_random_trace():
- random_one = 'c' + get_random(15)
- random_two = 'ae6' + get_random(9) + '0468'
- trace_id = '00-' + str(random_one) + str(random_two) + '-' + str(random_one) + '-01'
- return trace_id
- def get_mc():
- def a():
- seed = "1234567890ABCDEF"
- sa = []
- for i in range(2):
- sa.append(random.choice(seed))
- salt = ''.join(sa)
- return salt
- k = ''
- for i in range(6):
- k += a() + ':'
- return k[:-1]
- def get_trace():
- trace_list = [
- '00-70f99f2209e0b045dd14266ee1da0468-70f99f2209e0b045-01',
- '00-ce7faf4409b7fcc0ae6135fdd4250468-ce7faf4409b7fcc0-01',
- '00-ce7faf3b09b7fcc0ae6042f1d8100468-ce7faf3b09b7fcc0-01',
- '00-cdd79d2309b7fcc0ae6625a4cb190468-cdd79d2309b7fcc0-01',
- '00-cefde9f009b7fcc0ae6750e1349e0468-cefde9f009b7fcc0-01',
- '00-ced2e6ef09b7fcc0ae67dd7bfe000468-ced2e6ef09b7fcc0-01',
- '00-cefbfeb509b7fcc0ae659396a6ea0468-cefbfeb509b7fcc0-01',
- '00-cefaa25409b7fcc0ae657726a3c30468-cefaa25409b7fcc0-01',
- '00-6eb478cf09b7fcc0ae6e437c9e160468-6eb478cf09b7fcc0-01',
- '00-6eb4752709b7fcc0ae69f22235260468-6eb4752709b7fcc0-01',
- '00-6eb4469109b7fcc0ae6ba6f250a90468-6eb4469109b7fcc0-01',
- '00-6eb43ea209b7fcc0ae66a34128bd0468-6eb43ea209b7fcc0-01',
- '00-98d3c43df91c019bdf0d508485048c4c-98d3c43df91c019b-01',
- '00-a77c64227e911ee70811198281710139-a77c64227e911ee7-01',
- '00-354016da2d2a9c3b31f771521b8c3316-354016da2d2a9c3b-01',
- ]
- return random.choice(trace_list)
- def get_live_lottery_data(room_id):
- domain = 'webcast5-normal-ipv6-lf.amemv.com'
- url = 'https://' + domain + '/webcast/lottery/melon/check_user_right/?'
- retry = 0
- response_json = None
-
- while True:
- if retry > 30:
- break
- # key = 'DOUYIN_SCRAPE_DID_IID_TTREQ_1221'
- # rdid = DbRedis.connect().rpop(key)
- # if rdid:
- # result = rdid.split('@@@')
- # DbRedis.connect().lpush(key, rdid)
- # else:
- # time.sleep(1)
- # continue
- # # return None
- key = "DOUYIN_REGISTER_QUEUE"
- register_device = DbRedis.connect().rpop(key)
- if register_device:
- result = json.loads(register_device)
- DbRedis.connect().lpush(key, register_device)
- else:
- time.sleep(0.1)
- continue
- retry += 1
- rticket = str(int(time.time() * 1000))
- mc = get_mc
- udid = '8604' + get_random(11)
- trace_id = get_trace()
- # trace_id = get_random_trace()
- # openudid = '3b22' + str(udid.uuid4())[-12:]
-
- # device_id, iid, udid, openudid, cookie, V1, V2, device_type, device_brand = result[0], result[1], result[3], result[2], result[4], result[8], result[9], result[10], result[11]
- device_id, iid, udid, openudid, cookie = result['device_id'], result['iid'], result['uuid'], result['openudid'], result['cookie']
- query = {
- "room_id" : str(room_id),
- "webcast_sdk_version" : "1690",
- "webcast_language" : "zh",
- "webcast_locale" :"zh_CN",
- "webcast_gps_access" : "1",
- "os_api" : "23",
- "device_type" : "HSF-FL00",
- "ssmix" : "a",
- "manifest_version_code" : "120801",
- "dpi" : "640",
- "app_name" : "aweme",
- "version_name" : "12.8.0",
- "ts" : str(int(time.time())),
- "cpu_support64" : "true",
- "storage_type" : "0",
- "app_type" : "normal",
- "appTheme" : "dark",
- "ac" : "wifi",
- "host_abi" : "armeabi-v7a",
- "update_version_code" : "12809900",
- "channel" : "wandoujia_douyinnew_1128",
- "_rticket" : rticket,
- # "_rticket" : "1629688012123",
- "device_platform" : "android",
- "iid" : str(iid),
- "version_code" : "120800",
- "mac_address" : mc,
- #"mac_address" : "FC%3ADB%3AB3%3A56%3ABD%3AFD",
- "cdid" : "6c96979e-c729-419c-9516-3a85a7338d0c",
- "openudid" : str(openudid),
- "device_id" : str(device_id),
- "resolution" : "1440*2560",
- "os_version" : "6.0.1",
- "language" : "zh",
- "device_brand" : "HUAWEI",
- "aid" : "1128"
- }
- query_params = urllib.parse.urlencode(query)
- url = url + query_params
- body = ""
- xGorgon = X_Gorgon(query_params, body)
- userAgent = "com.ss.android.ugc.aweme/1208000 (Linux; U; Android 5.1.1; zh_CN; HSF-FL00; Build/LMY47V; Cronet/58.0.2991.0)"
- headers = {
- "Host" : domain,
- "Connection" : "keep-alive",
- "Cache-Control" : "max-age=0",
- "Upgrade-Insecure-Requests" : "1",
- "User-Agent" : userAgent,
- # 'accept-encoding': 'gzip, deflate',
- # "x-SS-REQ-TICKET": rticket,
- "x-gorgon" : xGorgon.get('X-Gorgon'),
- "x-khronos" : xGorgon.get('X-Khronos'),
- "passport-sdk-version" : "17",
- "sdk-version" : "2",
- "x-ss-dp" : "1128",
- "x-tt-trace-id" : trace_id,
- "cookie" : cookie
- }
- proxy = Proxy.get()
- proxies = {
- "http": "http://" + proxy,
- "https": "http://" + proxy
- }
- try:
- response = requests.get(
- url,
- headers=headers,
- proxies=proxies,
- timeout=8
- )
- if (response.status_code == 200) and (response.text is not None) and (response.text != ''):
- response_json = response.json()
- if (response_json.get('data') is not None):
- print(
- time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
- + ' 数据获取成功!'
- + '\n'
- + room_id + 'trace_id:' + trace_id
- + '\n'
- )
- break
- else:
- print(
- time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
- + ' 数据获取失败!'
- + '\n'
- + room_id + 'trace_id:' + trace_id
- + '\n'
- + response.text
- + Proxy.proxy_info
- )
-
- else:
- print(
- time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
- + ' 爬取http连接失败!'
- + str(response.status_code)
- + '\n'
- + Proxy.proxy_info
- + '\n'
- + room_id + 'trace_id:' + trace_id
- + '\n'
- + '爬取结果:' + str(response)
- + '\n'
- )
- time.sleep(1)
- except requests.exceptions.ProxyError as e:
- print(
- time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
- + ' 代理过期!'
- + str(e)
- + '\n'
- + room_id + 'trace_id:' + trace_id
- + '\n'
- + Proxy.proxy_info
- )
- Proxy.del_proxy(proxy)
- pass
-
- except requests.exceptions.ConnectTimeout as e:
- print(
- time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
- + ' ConnectTimeout!'
- + str(e)
- + '\n'
- + room_id + 'trace_id:' + trace_id
- + '\n'
- + Proxy.proxy_info
- )
- Proxy.del_proxy(proxy)
- pass
- except Exception as e:
- print(
- time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
- + ' 请求抛出异常!'
- + str(e)
- + '\n'
- + room_id + 'trace_id:' + trace_id
- + '\n'
- + Proxy.proxy_info
- )
- pass
- return response_json
- def scrape():
- rds_list = RdsRoomLotteryRequestList()
- while True:
- room_info = rds.get_request_param()
- if room_info is None:
- time.sleep(1)
- continue
- # return None
- room_id = str(room_info)
- print(
- time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
- + ' '
- + str(room_id)
- )
- try:
- response_json = get_live_lottery_data(room_id)
-
- if response_json is None:
- # rds_list.record_score(0)
- # rds_list.push_request_id(room_id)
- print(
- time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
- + ' 数据获取失败!响应数据为空!'
- + '\n'
- + room_id
- + '\n'
- )
- # sys.exit(0)
- else:
- data = json.dumps({
- "data": response_json.get('data'),
- "extra": {
- 'room_id': room_id
- }
- })
- # rds_list.record_score(1)
- rds_list.push_data_list(data)
- except Exception as e:
- # rds_list.record_score(0)
- rds_list.push_request_id(room_id)
- print(
- time.strftime("%H:%M:%S", time.localtime())
- + ' '
- + room_id
- + '数据异常:'
- + str(e)
- )
- # sys.exit(0)
- if __name__ == "__main__":
- print("主方法开始执行")
- # 并行线程数
- threading_count = int(sys.argv[1])
- num = int(sys.argv[2])
- rds = RdsRoomLotteryRequestList()
- print(
- time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
- + ' '
- + ' 开始执行,更新直播队列长度:'
- + str(rds.get_len())
- )
- while True:
- sys.stdout.flush()
-
- # 减去主线程
- active_count = threading.active_count() - 1
-
- increment = threading_count - active_count
-
- if increment > 0:
- sys.stdout.flush()
- task = threading.Thread(target=scrape, args=())
- task.start() # 准备就绪, 等待cpu执行
- # increment = increment - 1
-
- # current_time = time.time()
- # if current_time - start_time > 3600:
- # print(
- # time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
- # + ' 主方法执行终止'
- # )
- # sys.exit(0)
- time.sleep(1)
|