Browse Source

Merge branch 'master' of http://101.200.220.49:8001/houxiaohua/shop_live_scraper

chenzhiyuan 3 years ago
parent
commit
b2b2951bab
2 changed files with 56 additions and 47 deletions
  1. 49 39
      douyin_hourly_ranklist_commerce_scraper.py
  2. 7 8
      douyin_live_lottery_scraper.py

+ 49 - 39
douyin_hourly_ranklist_commerce_scraper.py

81
     query = {
81
     query = {
82
         "style" : "3",
82
         "style" : "3",
83
         "hour_info" : "0",
83
         "hour_info" : "0",
84
-        "room_id" : room_id,
84
+        "room_id" : str(room_id),
85
         "rank_type" : "31",
85
         "rank_type" : "31",
86
         "sec_anchor_id" : sec_anchor_id,
86
         "sec_anchor_id" : sec_anchor_id,
87
         "webcast_sdk_version" : "1710",
87
         "webcast_sdk_version" : "1710",
95
         "dpi" : "480",
95
         "dpi" : "480",
96
         "app_name" : "aweme",
96
         "app_name" : "aweme",
97
         "version_name" : "13.0.0",
97
         "version_name" : "13.0.0",
98
-        "ts" : ts,
98
+        "ts" : str(ts),
99
         "cpu_support64" : "true",
99
         "cpu_support64" : "true",
100
         "storage_type" : "0",
100
         "storage_type" : "0",
101
         "app_type" : "normal",
101
         "app_type" : "normal",
106
         "channel" : "tengxun_new",
106
         "channel" : "tengxun_new",
107
         "_rticket" : rticket,
107
         "_rticket" : rticket,
108
         "device_platform" : "android",
108
         "device_platform" : "android",
109
-        "iid" : iid,
109
+        "iid" : str(iid),
110
         "version_code" : "130000",
110
         "version_code" : "130000",
111
         "mac_address" : mc,
111
         "mac_address" : mc,
112
         "cdid" : "81542dc6-2aca-4ff6-ac58-d94179e9d3e6",
112
         "cdid" : "81542dc6-2aca-4ff6-ac58-d94179e9d3e6",
113
-        "openudid" : openudid,
114
-        "device_id" : device_id,
113
+        "openudid" : str(openudid),
114
+        "device_id" : str(device_id),
115
         "resolution" : "1080*1800",
115
         "resolution" : "1080*1800",
116
         "os_version" : "6.0",
116
         "os_version" : "6.0",
117
         "language" : "zh",
117
         "language" : "zh",
126
 
126
 
127
     xGorgon = X_Gorgon(query_params, body)
127
     xGorgon = X_Gorgon(query_params, body)
128
 
128
 
129
-    userAgent = 'okhttp/3.' + str(random.randint(0, 10)) + '.' + str(random.randint(0, 10)) + '.' + str(
130
-        random.randint(1, 10))
129
+    userAgent = f'com.ss.android.ugc.aweme/1300000 (Linux; U; Android 5.1.1; zh_CN; {device_type}; Build/LMY47V; Cronet/58.0.2991.0)'
131
 
130
 
132
     headers = {
131
     headers = {
133
         'Host': domain,
132
         'Host': domain,
141
         'sdk-version' : '2',
140
         'sdk-version' : '2',
142
         'x-ss-dp' : '1128',
141
         'x-ss-dp' : '1128',
143
         'x-tt-trace-id' : trace_id,
142
         'x-tt-trace-id' : trace_id,
143
+        'cookie' : cookie
144
     }
144
     }
145
 
145
 
146
     retry = 0
146
     retry = 0
147
     response_json = None
147
     response_json = None
148
 
148
 
149
     while True:
149
     while True:
150
-        if retry > 50:
150
+        if retry > 0:
151
             break
151
             break
152
 
152
 
153
         retry += 1
153
         retry += 1
231
 def scrape(room_id,sec_anchor_id,anchor_id):
231
 def scrape(room_id,sec_anchor_id,anchor_id):
232
     rds_list = RdsDouyinHourlyRankList()
232
     rds_list = RdsDouyinHourlyRankList()
233
 
233
 
234
-    key = 'DOUYIN_SCRAPE_DID_IID_TTREQ_1221'
235
-    rdid = DbRedis.connect().rpop(key)
236
-    if rdid:
237
-        result = rdid.split('@@@')
238
-    else:
239
-        result = []
240
-        return None
234
+    start_time = int(time.time())
241
 
235
 
242
-    DbRedis.connect().lpush(key, rdid)
236
+    while True:
243
 
237
 
244
-    try:
245
-        # 带货小时榜
246
-        commerce_response_json = get_commerce_rank_list_data(room_id=room_id, sec_anchor_id=sec_anchor_id, anchor_id=anchor_id, result=result)
247
-        
248
-        if commerce_response_json is None:
238
+        current_time = int(time.time())
239
+        if current_time - start_time >= 9*60:
249
             print(
240
             print(
250
-                time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) 
251
-                + ' 带货小时榜数据获取失败!响应数据为空!' 
252
-            )
241
+                    time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) 
242
+                    + ' 带货小时榜数据获取失败!响应数据为空!' 
243
+                )
253
             sys.exit(0)
244
             sys.exit(0)
245
+
246
+        key = 'DOUYIN_SCRAPE_DID_IID_TTREQ_1221'
247
+        rdid = DbRedis.connect().rpop(key)
248
+        if rdid:
249
+            result = rdid.split('@@@')
254
         else:
250
         else:
255
-            data = json.dumps({
256
-                "data": commerce_response_json.get('data'),
257
-                "extra": {}
258
-            })
259
-            rds_list.push_commerce_data_list(data)
260
-
261
-    except Exception as e:
262
-        print(
263
-            time.strftime("%H:%M:%S", time.localtime()) 
264
-            + ' ' 
265
-            + '数据异常:' 
266
-            + str(e)
267
-        )
268
-
269
-    sys.exit(0)
251
+            result = []
252
+            return None
253
+
254
+        DbRedis.connect().lpush(key, rdid)
255
+
256
+        try:
257
+            # 带货小时榜
258
+            commerce_response_json = get_commerce_rank_list_data(room_id=room_id, sec_anchor_id=sec_anchor_id, anchor_id=anchor_id, result=result)
259
+            
260
+            if commerce_response_json is None:
261
+                print(
262
+                    time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) 
263
+                    + ' 带货小时榜数据获取失败!响应数据为空!' 
264
+                )
265
+            else:
266
+                data = json.dumps({
267
+                    "data": commerce_response_json.get('data'),
268
+                    "extra": {}
269
+                })
270
+                rds_list.push_commerce_data_list(data)
271
+                sys.exit(0)
272
+
273
+        except Exception as e:
274
+            print(
275
+                time.strftime("%H:%M:%S", time.localtime()) 
276
+                + ' ' 
277
+                + '数据异常:' 
278
+                + str(e)
279
+            )
270
 
280
 
271
 if __name__ == "__main__":
281
 if __name__ == "__main__":
272
     print("主方法开始执行")
282
     print("主方法开始执行")

+ 7 - 8
douyin_live_lottery_scraper.py

78
     device_id, iid, udid, openudid, cookie, V1, V2, device_type, device_brand = result[0], result[1], result[3], result[2], result[4], result[8], result[9], result[10], result[11]
78
     device_id, iid, udid, openudid, cookie, V1, V2, device_type, device_brand = result[0], result[1], result[3], result[2], result[4], result[8], result[9], result[10], result[11]
79
     
79
     
80
     query = {
80
     query = {
81
-        "room_id" : room_id,
81
+        "room_id" : str(room_id),
82
         "webcast_sdk_version" : "1690",
82
         "webcast_sdk_version" : "1690",
83
         "webcast_language" : "zh",
83
         "webcast_language" : "zh",
84
         "webcast_locale" :"zh_CN",
84
         "webcast_locale" :"zh_CN",
85
         "webcast_gps_access" : "1",
85
         "webcast_gps_access" : "1",
86
         "os_api" : "23",
86
         "os_api" : "23",
87
-        "device_type" : "SM-G9200",
87
+        "device_type" : device_type,
88
         "ssmix" : "a",
88
         "ssmix" : "a",
89
         "manifest_version_code" : "120801",
89
         "manifest_version_code" : "120801",
90
         "dpi" : "640",
90
         "dpi" : "640",
102
         "_rticket" : rticket,
102
         "_rticket" : rticket,
103
         # "_rticket" : "1629688012123",
103
         # "_rticket" : "1629688012123",
104
         "device_platform" : "android",
104
         "device_platform" : "android",
105
-        "iid" : iid,
105
+        "iid" : str(iid),
106
         "version_code" : "120800",
106
         "version_code" : "120800",
107
         "mac_address" : mc,
107
         "mac_address" : mc,
108
         #"mac_address" : "FC%3ADB%3AB3%3A56%3ABD%3AFD",
108
         #"mac_address" : "FC%3ADB%3AB3%3A56%3ABD%3AFD",
109
         "cdid" : "6c96979e-c729-419c-9516-3a85a7338d0c",
109
         "cdid" : "6c96979e-c729-419c-9516-3a85a7338d0c",
110
-        "openudid" : openudid,
111
-        "device_id" : device_id,
110
+        "openudid" : str(openudid),
111
+        "device_id" : str(device_id),
112
         "resolution" : "1440*2560",
112
         "resolution" : "1440*2560",
113
         "os_version" : "6.0.1",
113
         "os_version" : "6.0.1",
114
         "language" : "zh",
114
         "language" : "zh",
115
-        "device_brand":"samsung",
115
+        "device_brand":device_brand,
116
         "aid" : "1128"
116
         "aid" : "1128"
117
     }
117
     }
118
 
118
 
123
 
123
 
124
     xGorgon = X_Gorgon(query_params, body)
124
     xGorgon = X_Gorgon(query_params, body)
125
 
125
 
126
-    userAgent = 'okhttp/3.' + str(random.randint(0, 10)) + '.' + str(random.randint(0, 10)) + '.' + str(
127
-        random.randint(1, 10))
126
+    userAgent = userAgent = f'com.ss.android.ugc.aweme/1208000 (Linux; U; Android 5.1.1; zh_CN; {device_type}; Build/LMY47V; Cronet/58.0.2991.0)'
128
 
127
 
129
     headers = {
128
     headers = {
130
         'Host': domain,
129
         'Host': domain,