|
@@ -0,0 +1,61 @@
|
|
1
|
+import time
|
|
2
|
+import threading
|
|
3
|
+import json
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+from rds_model.rds_user_info_list import RdsUserInfoList
|
|
7
|
+from log.print_log import PrintLog
|
|
8
|
+from web_dy import WebDouYin
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+def scrape():
|
|
12
|
+ rds_list = RdsUserInfoList()
|
|
13
|
+ web_dy = WebDouYin()
|
|
14
|
+ start_time = int(time.time())
|
|
15
|
+
|
|
16
|
+ while True:
|
|
17
|
+ try:
|
|
18
|
+ if int(time.time())-start_time > 5*60:
|
|
19
|
+ break
|
|
20
|
+ sec_uid = rds_list.get_wait_update_user()
|
|
21
|
+
|
|
22
|
+ if sec_uid is None:
|
|
23
|
+ time.sleep(0.1)
|
|
24
|
+ continue
|
|
25
|
+
|
|
26
|
+ sec_uid = str(sec_uid)
|
|
27
|
+
|
|
28
|
+ PrintLog.print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' ' + sec_uid + '开始抓取用户信息')
|
|
29
|
+ response_json = web_dy.get_user_info(sec_uid)
|
|
30
|
+ if response_json is None:
|
|
31
|
+ rds_list.put_user_info(sec_uid)
|
|
32
|
+ PrintLog.print(
|
|
33
|
+ time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + '数据获取失败!响应数据为空!' + '\n'
|
|
34
|
+ + sec_uid + '\n'
|
|
35
|
+ )
|
|
36
|
+
|
|
37
|
+ data = json.dumps({
|
|
38
|
+ "data": response_json.get('user'),
|
|
39
|
+ "extra": {
|
|
40
|
+ 'room_id': sec_uid
|
|
41
|
+ }
|
|
42
|
+ })
|
|
43
|
+
|
|
44
|
+ print('爬取成功')
|
|
45
|
+ rds_list.put_user_info(data)
|
|
46
|
+
|
|
47
|
+ except Exception as e:
|
|
48
|
+ print('爬取失败')
|
|
49
|
+ rds_list.put_wait_update_user(sec_uid)
|
|
50
|
+ PrintLog.print(time.strftime("%H:%M:%S", time.localtime()) + ' ' + sec_uid + '数据异常:' + str(e))
|
|
51
|
+ time.sleep(0.1)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+if __name__ == "__main__":
|
|
55
|
+ print("主方法开始执行")
|
|
56
|
+
|
|
57
|
+ rds = RdsUserInfoList()
|
|
58
|
+ print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' ' + ' 开始执行,待更新直播队列长度:' + str(rds.get_len()))
|
|
59
|
+ for i in range(1, 50):
|
|
60
|
+ task = threading.Thread(target=scrape, name=i)
|
|
61
|
+ task.start() # 准备就绪,等待cpu执行
|