shensong00 %!s(int64=3) %!d(string=před) roky
rodič
revize
f91e20eb3c
2 změnil soubory, kde provedl 35 přidání a 40 odebrání
  1. 26 31
      douyin_video_scraper_web.py
  2. 9 9
      web_dy.py

+ 26 - 31
douyin_video_scraper_web.py

@@ -11,7 +11,7 @@ from rds_model.rds_user_video_list import RdsUserVideoList
11 11
 if __name__ == '__main__':
12 12
     import warnings
13 13
     start_time = time.time()
14
-    print('1')
14
+    
15 15
     warnings.filterwarnings("ignore")
16 16
     while True:
17 17
 
@@ -31,20 +31,19 @@ if __name__ == '__main__':
31 31
             "http": "http://" + proxy,
32 32
             "https": "http://" + proxy
33 33
         }
34
-        print('2')
34
+        
35 35
         errorn = 0
36 36
         suc_f = 0
37 37
         webdy=WebDouYin()
38
-        print('3')
38
+        
39 39
         rds = RdsUserVideoList()
40
-        print('4')
41 40
 
42 41
         for index in range(1000):
43 42
             try:
44 43
                 # ukey = 'BrandLiveData.DouyinUserVideo'
45 44
                 # users = DbRedis.connect().rpop(ukey)
46 45
                 users = rds.get_request_param()
47
-                print('5')
46
+                
48 47
                 if users is None:
49 48
                     time.sleep(10)
50 49
                     continue
@@ -61,19 +60,18 @@ if __name__ == '__main__':
61 60
                 videos = webdy.get_user_videos(sec_user_id=sec_user_id,max_cursor=0,count=20,proxy=proxies)
62 61
                 # skey = 'BrandLiveData.DouyinUserVideoScore'
63 62
                 # data_score = DbRedis.connect().rpop(skey)
64
-                print('6')
63
+                
65 64
                 data_score = rds.get_score()
66 65
                 if data_score is None:
67
-                    data_score = {"total" : 1, "success" : 0, "fail" : 0}
66
+                    data_score = '1@@@0@@@0'
68 67
                 else:
69
-                    data_score = data_score.loads(data_score)
70
-                    total = data_score.get('total')
71
-                    data_score.update({
72
-                        'total' : total + 1
73
-                        })
68
+                    data_score = data_score.split('@@@')
69
+                    total, success, fail = int(data_score[0]), int(data_score[1]), int(data_score[2])
70
+                    total = total + 1
71
+                    data_score = total + '@@@' + success + '@@@' + fail
74 72
                 rds.record_score(data_score)
75 73
                 # DbRedis.connect().lpush(skey, data_score)
76
-                print('7')
74
+                
77 75
                 awemes = videos.get('aweme_list')
78 76
                 stime = time.strftime("%H:%M:%S", time.localtime())
79 77
                 if awemes:
@@ -88,13 +86,12 @@ if __name__ == '__main__':
88 86
                     # data_score = DbRedis.connect().rpop(skey)
89 87
                     data_score = rds.get_score()
90 88
                     if data_score is None:
91
-                        data_score = {"total" : 1, "success" : 1, "fail" : 0}
89
+                        data_score = '1@@@1@@@0'
92 90
                     else:
93
-                        data_score = data_score.loads(data_score)
94
-                        success = data_score.get('success')
95
-                        data_score.update({
96
-                            'success' : success + 1
97
-                            })
91
+                        data_score = data_score.split('@@@')
92
+                        total, success, fail = int(data_score[0]), int(data_score[1]), int(data_score[2])
93
+                        success = success + 1
94
+                        data_score = total + '@@@' + success + '@@@' + fail
98 95
                     # DbRedis.connect().lpush(skey, data_score)
99 96
                     rds.record_score(data_score)
100 97
 
@@ -103,13 +100,12 @@ if __name__ == '__main__':
103 100
                     # data_score = DbRedis.connect().rpop(skey)
104 101
                     data_score = rds.get_score()
105 102
                     if data_score is None:
106
-                        data_score = {"total" : 1, "success" : 0, "fail" : 1}
103
+                        data_score = '1@@@0@@@1'
107 104
                     else:
108
-                        data_score = data_score.loads(data_score)
109
-                        fail = data_score.get('fail')
110
-                        data_score.update({
111
-                            'fail' : fail + 1
112
-                            })
105
+                        data_score = data_score.split('@@@')
106
+                        total, success, fail = int(data_score[0]), int(data_score[1]), int(data_score[2])
107
+                        fail = fail + 1
108
+                        data_score = total + '@@@' + success + '@@@' + fail
113 109
                     # DbRedis.connect().lpush(skey, data_score) 
114 110
                     rds.record_score(data_score)
115 111
                     # PrintLog.print(stime+" 失败"+str(index)+' '+ str(sec_user_id))
@@ -126,13 +122,12 @@ if __name__ == '__main__':
126 122
                 # data_score = DbRedis.connect().rpop(skey)
127 123
                 data_score = rds.get_score()
128 124
                 if data_score is None:
129
-                    data_score = {"total" : 1, "success" : 0, "fail" : 1}
125
+                    data_score = '1@@@0@@@1'
130 126
                 else:
131
-                    data_score = data_score.loads(data_score)
132
-                    fail = data_score.get('fail')
133
-                    data_score.update({
134
-                        'fail' : fail + 1
135
-                        })
127
+                    data_score = data_score.split('@@@')
128
+                    total, success, fail = int(data_score[0]), int(data_score[1]), int(data_score[2])
129
+                    fail = fail + 1
130
+                    data_score = total + '@@@' + success + '@@@' + fail
136 131
                 # DbRedis.connect().lpush(skey, data_score) 
137 132
                 rds.record_score(data_score)
138 133
                 print(

+ 9 - 9
web_dy.py

@@ -8,15 +8,15 @@ class WebDouYin:
8 8
         self.proxies = {
9 9
         }
10 10
     def get_signature(self,url=None,method='_signature'):
11
-        print('5-3-1')
11
+        
12 12
         with open('signature.js', 'r', encoding='utf-8') as f:
13 13
             b = f.read()
14
-        print('5-3-2')
14
+        
15 15
         c = execjs.compile(b)
16
-        print('5-3-3')
16
+        
17 17
         # url=url.replace('%28','(').replace('%29',')').replace('%2C',',')
18 18
         d = c.call(method, url.replace('\n',''))
19
-        print('5-3-4')
19
+        
20 20
         # print('_signature',d)
21 21
         return d
22 22
     def get_ck(self):
@@ -63,9 +63,9 @@ class WebDouYin:
63 63
             print(json_data)
64 64
         return json_data
65 65
     def get_user_videos(self,sec_user_id,max_cursor=0,count=10,proxy=None):
66
-        print('5-1')
66
+        
67 67
         ua,ck=self.get_ua_ck('get_user_videos')
68
-        print('5-2')
68
+        
69 69
         url='https://www.douyin.com/aweme/v1/web/aweme/post/?'
70 70
         param={
71 71
             'device_platform': 'webapp',
@@ -88,10 +88,10 @@ class WebDouYin:
88 88
         }
89 89
 
90 90
         url = url + parse.urlencode(param)
91
-        print('5-3')
91
+        
92 92
         _signature = self.get_signature(url)
93 93
         url+='&_signature='+quote(_signature)
94
-        print('5-4')
94
+        
95 95
         headers = {
96 96
             "authority": "www.douyin.com",
97 97
             "method": "GET",
@@ -107,7 +107,7 @@ class WebDouYin:
107 107
         }
108 108
         if ck:headers['cookie']=ck
109 109
         json_data=self.response(url=url,headers=headers,proxy=proxy)
110
-        print('5-5')
110
+        
111 111
         return json_data
112 112
         # print(json_data)
113 113
     def get_user_info(self,sec_user_id,proxy=None):