shensong00 %!s(int64=3) %!d(string=před) roky
rodič
revize
c83658f642
3 změnil soubory, kde provedl 24 přidání a 6 odebrání
  1. 8 2
      douyin_video_scraper_web.py
  2. 10 1
      rds_model/rds_user_video_list.py
  3. 6 3
      web_dy.py

+ 8 - 2
douyin_video_scraper_web.py

@@ -40,6 +40,11 @@ if __name__ == '__main__':
40 40
         
41 41
         rds = RdsUserVideoList()
42 42
 
43
+        ck = rds.get_cookie()
44
+        if ck is None:
45
+            ck = webdy.get_ck(proxy=proxies)
46
+            rds.set_cookie(ck)
47
+
43 48
         for index in range(1000):
44 49
             try:
45 50
                 # ukey = 'BrandLiveData.DouyinUserVideo'
@@ -47,7 +52,7 @@ if __name__ == '__main__':
47 52
                 users = rds.get_request_param()
48 53
                 
49 54
                 if users is None:
50
-                    time.sleep(10)
55
+                    time.sleep(1)
51 56
                     continue
52 57
                 user = json.loads(users)
53 58
                 user_id = user.get('uid')
@@ -60,7 +65,7 @@ if __name__ == '__main__':
60 65
                 })
61 66
                 
62 67
                 users = json.dumps(user)
63
-                videos = webdy.get_user_videos(sec_user_id=sec_user_id,max_cursor=0,count=20,proxy=proxies)
68
+                videos = webdy.get_user_videos(sec_user_id=sec_user_id,max_cursor=0,count=20,proxy=proxies,cookie=ck)
64 69
                 # skey = 'BrandLiveData.DouyinUserVideoScore'
65 70
                 # data_score = DbRedis.connect().rpop(skey)
66 71
                 
@@ -147,3 +152,4 @@ if __name__ == '__main__':
147 152
                     + "\n"
148 153
                 )
149 154
                 break
155
+

+ 10 - 1
rds_model/rds_user_video_list.py

@@ -33,4 +33,13 @@ class RdsUserVideoList:
33 33
     # 获取现有记录
34 34
     def get_score(self):
35 35
         key = 'BrandLiveData.DouyinUserVideoScore'
36
-        return self.redis.get(key)
36
+        return self.redis.get(key)
37
+    
38
+    def get_cookie(self):
39
+        key = 'BrandLiveData.DouyinUserVideoWebCookie'
40
+        return self.redis.get(key)
41
+
42
+    def set_cookie(self, data):
43
+        key = 'BrandLiveData.DouyinUserVideoWebCookie'
44
+        self.redis.set(key, data)
45
+        self.redis.expire(key, 7200)

+ 6 - 3
web_dy.py

@@ -18,7 +18,7 @@ class WebDouYin:
18 18
         
19 19
         # print('_signature',d)
20 20
         return d
21
-    def get_ck(self):
21
+    def get_ck(self, proxy=None):
22 22
         requests.packages.urllib3.disable_warnings()
23 23
         headers = {
24 24
             'authority': 'www.douyin.com',
@@ -31,7 +31,7 @@ class WebDouYin:
31 31
             'upgrade-insecure-requests': '1',
32 32
             'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36',
33 33
         }
34
-        res = requests.get('https://www.douyin.com/', headers=headers,verify=False)
34
+        res = requests.get('https://www.douyin.com/', headers=headers,verify=False, proxies=proxy)
35 35
         cookie_dict=res.cookies.get_dict()
36 36
         ck=''
37 37
         if cookie_dict:
@@ -65,9 +65,12 @@ class WebDouYin:
65 65
             json_data = "请求失败" + str(e)
66 66
             print(json_data)
67 67
         return json_data
68
-    def get_user_videos(self,sec_user_id,max_cursor=0,count=10,proxy=None):
68
+    def get_user_videos(self,sec_user_id,max_cursor=0,count=10,proxy=None,cookie=None):
69 69
         
70 70
         ua,ck=self.get_ua_ck('get_user_videos')
71
+
72
+        if cookie:
73
+            ck = cookie
71 74
         
72 75
         url='https://www.douyin.com/aweme/v1/web/aweme/post/?'
73 76
         param={