Browse Source

Merge branch 'master' of http://101.200.220.49:8001/houxiaohua/shop_live_scraper

chenzhiyuan 3 years ago
parent
commit
b2b2951bab
2 changed files with 56 additions and 47 deletions
  1. 49 39
      douyin_hourly_ranklist_commerce_scraper.py
  2. 7 8
      douyin_live_lottery_scraper.py

+ 49 - 39
douyin_hourly_ranklist_commerce_scraper.py

@@ -81,7 +81,7 @@ def get_commerce_rank_list_data(room_id, sec_anchor_id, anchor_id, result):
81 81
     query = {
82 82
         "style" : "3",
83 83
         "hour_info" : "0",
84
-        "room_id" : room_id,
84
+        "room_id" : str(room_id),
85 85
         "rank_type" : "31",
86 86
         "sec_anchor_id" : sec_anchor_id,
87 87
         "webcast_sdk_version" : "1710",
@@ -95,7 +95,7 @@ def get_commerce_rank_list_data(room_id, sec_anchor_id, anchor_id, result):
95 95
         "dpi" : "480",
96 96
         "app_name" : "aweme",
97 97
         "version_name" : "13.0.0",
98
-        "ts" : ts,
98
+        "ts" : str(ts),
99 99
         "cpu_support64" : "true",
100 100
         "storage_type" : "0",
101 101
         "app_type" : "normal",
@@ -106,12 +106,12 @@ def get_commerce_rank_list_data(room_id, sec_anchor_id, anchor_id, result):
106 106
         "channel" : "tengxun_new",
107 107
         "_rticket" : rticket,
108 108
         "device_platform" : "android",
109
-        "iid" : iid,
109
+        "iid" : str(iid),
110 110
         "version_code" : "130000",
111 111
         "mac_address" : mc,
112 112
         "cdid" : "81542dc6-2aca-4ff6-ac58-d94179e9d3e6",
113
-        "openudid" : openudid,
114
-        "device_id" : device_id,
113
+        "openudid" : str(openudid),
114
+        "device_id" : str(device_id),
115 115
         "resolution" : "1080*1800",
116 116
         "os_version" : "6.0",
117 117
         "language" : "zh",
@@ -126,8 +126,7 @@ def get_commerce_rank_list_data(room_id, sec_anchor_id, anchor_id, result):
126 126
 
127 127
     xGorgon = X_Gorgon(query_params, body)
128 128
 
129
-    userAgent = 'okhttp/3.' + str(random.randint(0, 10)) + '.' + str(random.randint(0, 10)) + '.' + str(
130
-        random.randint(1, 10))
129
+    userAgent = f'com.ss.android.ugc.aweme/1300000 (Linux; U; Android 5.1.1; zh_CN; {device_type}; Build/LMY47V; Cronet/58.0.2991.0)'
131 130
 
132 131
     headers = {
133 132
         'Host': domain,
@@ -141,13 +140,14 @@ def get_commerce_rank_list_data(room_id, sec_anchor_id, anchor_id, result):
141 140
         'sdk-version' : '2',
142 141
         'x-ss-dp' : '1128',
143 142
         'x-tt-trace-id' : trace_id,
143
+        'cookie' : cookie
144 144
     }
145 145
 
146 146
     retry = 0
147 147
     response_json = None
148 148
 
149 149
     while True:
150
-        if retry > 50:
150
+        if retry > 0:
151 151
             break
152 152
 
153 153
         retry += 1
@@ -231,42 +231,52 @@ def get_commerce_rank_list_data(room_id, sec_anchor_id, anchor_id, result):
231 231
 def scrape(room_id,sec_anchor_id,anchor_id):
232 232
     rds_list = RdsDouyinHourlyRankList()
233 233
 
234
-    key = 'DOUYIN_SCRAPE_DID_IID_TTREQ_1221'
235
-    rdid = DbRedis.connect().rpop(key)
236
-    if rdid:
237
-        result = rdid.split('@@@')
238
-    else:
239
-        result = []
240
-        return None
234
+    start_time = int(time.time())
241 235
 
242
-    DbRedis.connect().lpush(key, rdid)
236
+    while True:
243 237
 
244
-    try:
245
-        # 带货小时榜
246
-        commerce_response_json = get_commerce_rank_list_data(room_id=room_id, sec_anchor_id=sec_anchor_id, anchor_id=anchor_id, result=result)
247
-        
248
-        if commerce_response_json is None:
238
+        current_time = int(time.time())
239
+        if current_time - start_time >= 9*60:
249 240
             print(
250
-                time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) 
251
-                + ' 带货小时榜数据获取失败!响应数据为空!' 
252
-            )
241
+                    time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) 
242
+                    + ' 带货小时榜数据获取失败!响应数据为空!' 
243
+                )
253 244
             sys.exit(0)
245
+
246
+        key = 'DOUYIN_SCRAPE_DID_IID_TTREQ_1221'
247
+        rdid = DbRedis.connect().rpop(key)
248
+        if rdid:
249
+            result = rdid.split('@@@')
254 250
         else:
255
-            data = json.dumps({
256
-                "data": commerce_response_json.get('data'),
257
-                "extra": {}
258
-            })
259
-            rds_list.push_commerce_data_list(data)
260
-
261
-    except Exception as e:
262
-        print(
263
-            time.strftime("%H:%M:%S", time.localtime()) 
264
-            + ' ' 
265
-            + '数据异常:' 
266
-            + str(e)
267
-        )
268
-
269
-    sys.exit(0)
251
+            result = []
252
+            return None
253
+
254
+        DbRedis.connect().lpush(key, rdid)
255
+
256
+        try:
257
+            # 带货小时榜
258
+            commerce_response_json = get_commerce_rank_list_data(room_id=room_id, sec_anchor_id=sec_anchor_id, anchor_id=anchor_id, result=result)
259
+            
260
+            if commerce_response_json is None:
261
+                print(
262
+                    time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) 
263
+                    + ' 带货小时榜数据获取失败!响应数据为空!' 
264
+                )
265
+            else:
266
+                data = json.dumps({
267
+                    "data": commerce_response_json.get('data'),
268
+                    "extra": {}
269
+                })
270
+                rds_list.push_commerce_data_list(data)
271
+                sys.exit(0)
272
+
273
+        except Exception as e:
274
+            print(
275
+                time.strftime("%H:%M:%S", time.localtime()) 
276
+                + ' ' 
277
+                + '数据异常:' 
278
+                + str(e)
279
+            )
270 280
 
271 281
 if __name__ == "__main__":
272 282
     print("主方法开始执行")

+ 7 - 8
douyin_live_lottery_scraper.py

@@ -78,13 +78,13 @@ def get_live_lottery_data(room_id, result):
78 78
     device_id, iid, udid, openudid, cookie, V1, V2, device_type, device_brand = result[0], result[1], result[3], result[2], result[4], result[8], result[9], result[10], result[11]
79 79
     
80 80
     query = {
81
-        "room_id" : room_id,
81
+        "room_id" : str(room_id),
82 82
         "webcast_sdk_version" : "1690",
83 83
         "webcast_language" : "zh",
84 84
         "webcast_locale" :"zh_CN",
85 85
         "webcast_gps_access" : "1",
86 86
         "os_api" : "23",
87
-        "device_type" : "SM-G9200",
87
+        "device_type" : device_type,
88 88
         "ssmix" : "a",
89 89
         "manifest_version_code" : "120801",
90 90
         "dpi" : "640",
@@ -102,17 +102,17 @@ def get_live_lottery_data(room_id, result):
102 102
         "_rticket" : rticket,
103 103
         # "_rticket" : "1629688012123",
104 104
         "device_platform" : "android",
105
-        "iid" : iid,
105
+        "iid" : str(iid),
106 106
         "version_code" : "120800",
107 107
         "mac_address" : mc,
108 108
         #"mac_address" : "FC%3ADB%3AB3%3A56%3ABD%3AFD",
109 109
         "cdid" : "6c96979e-c729-419c-9516-3a85a7338d0c",
110
-        "openudid" : openudid,
111
-        "device_id" : device_id,
110
+        "openudid" : str(openudid),
111
+        "device_id" : str(device_id),
112 112
         "resolution" : "1440*2560",
113 113
         "os_version" : "6.0.1",
114 114
         "language" : "zh",
115
-        "device_brand":"samsung",
115
+        "device_brand":device_brand,
116 116
         "aid" : "1128"
117 117
     }
118 118
 
@@ -123,8 +123,7 @@ def get_live_lottery_data(room_id, result):
123 123
 
124 124
     xGorgon = X_Gorgon(query_params, body)
125 125
 
126
-    userAgent = 'okhttp/3.' + str(random.randint(0, 10)) + '.' + str(random.randint(0, 10)) + '.' + str(
127
-        random.randint(1, 10))
126
+    userAgent = userAgent = f'com.ss.android.ugc.aweme/1208000 (Linux; U; Android 5.1.1; zh_CN; {device_type}; Build/LMY47V; Cronet/58.0.2991.0)'
128 127
 
129 128
     headers = {
130 129
         'Host': domain,