shensong00 3 年之前
父節點
當前提交
6164d36705
共有 2 個文件被更改,包括 13 次插入3 次删除
  1. 5 1
      douyin_video_scraper_web.py
  2. 8 2
      web_dy.py

+ 5 - 1
douyin_video_scraper_web.py

@@ -43,7 +43,11 @@ if __name__ == '__main__':
43 43
         ck = rds.get_cookie()
44 44
         if ck is None:
45 45
             ck = webdy.get_ck(proxy=proxies)
46
-            rds.set_cookie(ck)
46
+            if ck:
47
+                rds.set_cookie(ck)
48
+            else:
49
+                print('获取cookie失败')
50
+                break
47 51
 
48 52
         for index in range(1000):
49 53
             try:

+ 8 - 2
web_dy.py

@@ -18,7 +18,7 @@ class WebDouYin:
18 18
         
19 19
         # print('_signature',d)
20 20
         return d
21
-    def get_ck(self, proxy=None):
21
+    def get_ck(self, proxy=None, retime=0):
22 22
         requests.packages.urllib3.disable_warnings()
23 23
         headers = {
24 24
             'authority': 'www.douyin.com',
@@ -31,13 +31,19 @@ class WebDouYin:
31 31
             'upgrade-insecure-requests': '1',
32 32
             'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36',
33 33
         }
34
-        res = requests.get('https://www.douyin.com/', headers=headers,verify=False, proxies=proxy)
34
+        res = requests.get('https://www.douyin.com/', headers=headers,verify=False, proxies=proxy,timeout=8)
35 35
         cookie_dict=res.cookies.get_dict()
36 36
         ck=''
37
+        if retime >= 2:
38
+            return None
39
+
37 40
         if cookie_dict:
38 41
             for k,v in cookie_dict.items():
39 42
                 ck+='%s=%s; '%(k,v)
40 43
             ck=ck[:-2]
44
+        else:
45
+            retime += 1
46
+            ck = self.get_ck(proxy=proxy, retime=retime)
41 47
         ck+='; passport_csrf_token='+str(uuid.uuid4()).replace('-','')
42 48
         # print(ck)
43 49
         return ck