3 years ago · 259d7f7ad3
--- a/dy_userinfo_update.py
+++ b/dy_userinfo_update.py
@@ -0,0 +1,61 @@
 
				+import time
			
 
				+import threading
			
 
				+import json
			
 
				+
			
 
				+
			
 
				+from rds_model.rds_user_info_list import RdsUserInfoList
			
 
				+from log.print_log import PrintLog
			
 
				+from web_dy import WebDouYin
			
 
				+
			
 
				+
			
 
				+def scrape():
			
 
				+    rds_list = RdsUserInfoList()
			
 
				+    web_dy = WebDouYin()
			
 
				+    start_time = int(time.time())
			
 
				+
			
 
				+    while True:
			
 
				+        try:
			
 
				+            if int(time.time())-start_time > 5*60:
			
 
				+                break
			
 
				+            sec_uid = rds_list.get_wait_update_user()
			
 
				+
			
 
				+            if sec_uid is None:
			
 
				+                time.sleep(0.1)
			
 
				+                continue
			
 
				+
			
 
				+            sec_uid = str(sec_uid)
			
 
				+
			
 
				+            PrintLog.print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' ' + sec_uid + '开始抓取用户信息')
			
 
				+            response_json = web_dy.get_user_info(sec_uid)
			
 
				+            if response_json is None:
			
 
				+                rds_list.put_user_info(sec_uid)
			
 
				+                PrintLog.print(
			
 
				+                    time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + '数据获取失败！响应数据为空！' + '\n'
			
 
				+                    + sec_uid + '\n'
			
 
				+                )
			
 
				+
			
 
				+            data = json.dumps({
			
 
				+                "data": response_json.get('user'),
			
 
				+                "extra": {
			
 
				+                    'room_id': sec_uid
			
 
				+                }
			
 
				+            })
			
 
				+
			
 
				+            print('爬取成功')
			
 
				+            rds_list.put_user_info(data)
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            print('爬取失败')
			
 
				+            rds_list.put_wait_update_user(sec_uid)
			
 
				+            PrintLog.print(time.strftime("%H:%M:%S", time.localtime()) + ' ' + sec_uid + '数据异常：' + str(e))
			
 
				+        time.sleep(0.1)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    print("主方法开始执行")
			
 
				+
			
 
				+    rds = RdsUserInfoList()
			
 
				+    print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' ' + ' 开始执行，待更新直播队列长度：' + str(rds.get_len()))
			
 
				+    for i in range(1, 50):
			
 
				+        task = threading.Thread(target=scrape, name=i)
			
 
				+        task.start()  # 准备就绪,等待cpu执行
			
--- a/rds_model/rds_user_info_list.py
+++ b/rds_model/rds_user_info_list.py
@@ -0,0 +1,27 @@
 
				+from rds_model.db_redis import DbRedis
			
 
				+
			
 
				+
			
 
				+class RdsUserInfoList:
			
 
				+    def __init__(self):
			
 
				+        self.redis = DbRedis.connect()
			
 
				+
			
 
				+    # 获取待更新用户信息的队列长度
			
 
				+    def get_len(self):
			
 
				+        key = 'BrandLiveData.DyWaitingUpdateUserInfoList'
			
 
				+        return self.redis.llen(key)
			
 
				+
			
 
				+    # 从待更新信息的用户队列头部获取一个
			
 
				+    def get_wait_update_user(self):
			
 
				+        key = 'BrandLiveData.DyWaitingUpdateUserInfoList'
			
 
				+        return self.redis.lpop(key)
			
 
				+
			
 
				+    # 新增待更新用户
			
 
				+    def put_wait_update_user(self, sec_uid):
			
 
				+        key = 'BrandLiveData.DyWaitingUpdateUserInfoList'
			
 
				+        return self.redis.lpush(key, sec_uid)
			
 
				+
			
 
				+    # 已获取好的用户信息加入到列队
			
 
				+    def put_user_info(self, data):
			
 
				+        key = 'BrandLiveData.DyUpdateUserInfoList'
			
 
				+        self.redis.rpush(key, data)
			
 
				+
			
--- a/web_dy.py
+++ b/web_dy.py
@@ -1,4 +1,4 @@
 
				-import requests,json,random,execjs,uuid
			
 
				+import requests,json,random,execjs,uuid,os
			
 
				 from urllib import parse
			
 
				 from urllib.parse import quote
			
 
				 from libs.proxy import Proxy
			
@@ -9,14 +9,14 @@ class WebDouYin:
 
				         self.proxies = {
			
 
				         }
			
 
				     def get_signature(self,url=None,method='_signature'):
			
 
				-        with open('/mnt/shop_live_scraper/signature.js', 'r', encoding='utf-8') as f:
			
 
				+
			
 
				+        with open(os.getcwd()+"/signature.js", 'r', encoding='utf-8') as f:
			
 
				             b = f.read()
			
 
				         
			
 
				         c = execjs.compile(b)
			
 
				         
			
 
				         # url=url.replace('%28','(').replace('%29',')').replace('%2C',',')
			
 
				         d = c.call(method, url.replace('\n',''))
			
 
				-        
			
 
				         # print('_signature',d)
			
 
				         return d
			
 
				     def get_ck(self, proxy=None):
			
@@ -144,6 +144,7 @@ class WebDouYin:
 
				             'browser_version':ua.replace('Mozilla/',''),
			
 
				             'browser_online': 'true',
			
 
				         }
			
 
				+
			
 
				         url = url + parse.urlencode(param)
			
 
				         _signature = self.get_signature(url)
			
 
				         url+='&_signature='+quote(_signature)
			
@@ -160,6 +161,7 @@ class WebDouYin:
 
				             "user-agent":ua,
			
 
				             "withcredentials": "true",
			
 
				         }
			
 
				+
			
 
				         if ck:headers['cookie']=ck
			
 
				         json_data=self.response(url=url,headers=headers,proxy=proxy)
			
 
				         return json_data
			
@@ -167,8 +169,7 @@ if __name__ == '__main__':
 
				     webdy=WebDouYin()
			
 
				     # res = webdy.get_user_videos('MS4wLjABAAAAqLPgx-hHf27EqGEtRQ6YyuQQTmikB5CBO1jXy61yhWKujGd8KO5G8V2vdcLQJAym')
			
 
				     # print(res)
			
 
				-    # webdy.get_user_info('MS4wLjABAAAAC2euvL-0qMZyd80aNwZa-wX5KXuz_r7YVNHSBOogfVg')
			
 
				-
			
 
				+    info = webdy.get_user_info('MS4wLjABAAAAHYNHFpUR36AQSxdDpSFrI2uM4aDvSF-8vjtjNiLepD0')
			
 
				     proxy = Proxy.get()
			
 
				     print(proxy)
			
 
				     proxies = {
			
@@ -176,4 +177,5 @@ if __name__ == '__main__':
 
				         "https": "http://" + proxy
			
 
				     }
			
 
				 
			
 
				-    ck = webdy.get_ck()
			
 
				+#    ck = webdy.get_user_info("MS4wLjABAAAAC2euvL-0qMZyd80aNwZa-wX5KXuz_r7YVNHSBOogfVg")
			
 
				+#    print(ck)