2 Commits dbbc15e954 ... 96c0c4cd56

Auteur SHA1 Bericht Datum
  sunhao 96c0c4cd56 Merge branch 'master' of http://101.200.220.49:8001/houxiaohua/shop_live_scraper 3 jaren geleden
  sunhao f3f25bf19c 优化脚本显现 3 jaren geleden
2 gewijzigde bestanden met toevoegingen van 134 en 136 verwijderingen
  1. 85 86
      live_promotion_detail_v1_sale_scraper.py
  2. 49 50
      live_promotions_scraper.py

+ 85 - 86
live_promotion_detail_v1_sale_scraper.py

@@ -15,99 +15,105 @@ from log.print_log import PrintLog
15 15
 
16 16
 start_time = time.time()
17 17
 
18
-def scrape(request_data, heat):
19
-    rds_list = RdsLiveCommodityDetailRequestList()
20
-    request_split = request_data.split('@')
18
+def scrape(heat):
19
+    while True:
20
+        rds = RdsLiveCommodityDetailRequestList()
21
+        request_data = rds.get_request_params(heat)
22
+        if request_data is None:
23
+            time.sleep(0.1)
24
+            break
21 25
 
22
-    if len(request_split) < 5:
23
-        print('数据格式不对' + request_data)
24
-        sys.exit(0)
26
+        rds_list = RdsLiveCommodityDetailRequestList()
27
+        request_split = request_data.split('@')
25 28
 
26
-    promotion_id = request_split[0]
27
-    product_id = request_split[1]
28
-    uid = request_split[2]
29
-    room_id = request_split[3]
30
-    exec_time = int(request_split[4])
29
+        if len(request_split) < 5:
30
+            print('数据格式不对' + request_data)
31
+            sys.exit(0)
31 32
 
32
-    if int(time.time()) < exec_time:
33
-        sp_ns = exec_time - int(time.time())
34
-        time.sleep(sp_ns)
33
+        promotion_id = request_split[0]
34
+        product_id = request_split[1]
35
+        uid = request_split[2]
36
+        room_id = request_split[3]
37
+        exec_time = int(request_split[4])
35 38
 
39
+        if int(time.time()) < exec_time:
40
+            sp_ns = exec_time - int(time.time())
41
+            time.sleep(sp_ns)
36 42
 
37
-    PrintLog.print(
38
-        time.strftime("%H:%M:%S", time.localtime()) + ' '
39
-           + time.strftime("%H:%M:%S", time.localtime(exec_time)) + ' '
40
-           + str(heat) + ' ' + promotion_id
41
-    )
42 43
 
43
-    try:
44
+        PrintLog.print(
45
+            time.strftime("%H:%M:%S", time.localtime()) + ' '
46
+               + time.strftime("%H:%M:%S", time.localtime(exec_time)) + ' '
47
+               + str(heat) + ' ' + promotion_id
48
+        )
44 49
 
45
-        commodity_detail = LiveCommodityDetailV1.get_data(product_id)
50
+        try:
46 51
 
47
-        if (commodity_detail is None) or (commodity_detail == '') or ('real_sell_num' in commodity_detail)==False :
48
-            rds_list.record_v1_score(0)
49
-            PrintLog.print(
50
-                time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' 数据为空 ' + promotion_id
51
-            )
52
+            commodity_detail = LiveCommodityDetailV1.get_data(product_id)
52 53
 
53
-            if heat==0:
54
-                if random.randint(0, 100) > 0:
55
-                    rds_list.add_request_params(request_data, heat)
56
-                    print('首次失败重新插入 ' + request_data)
57
-                    sys.exit(0)
54
+            if (commodity_detail is None) or (commodity_detail == '') or ('real_sell_num' in commodity_detail)==False :
55
+                rds_list.record_v1_score(0)
56
+                PrintLog.print(
57
+                    time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' 数据为空 ' + promotion_id
58
+                )
58 59
 
59
-            if heat>0:
60
-                if random.randint(0, 9) > 0:
61
-                    rds_list.add_request_params(request_data, heat)
62
-                    print(str(heat) + '首次失败重新插入 ' + request_data)
63
-                    sys.exit(0)
60
+                if heat==0:
61
+                    if random.randint(0, 100) > 0:
62
+                        rds_list.add_request_params(request_data, heat)
63
+                        print('首次失败重新插入 ' + request_data)
64
+                        sys.exit(0)
64 65
 
65
-        if heat < 2:
66
-            goodsDb = MysqlDyLiveCommodity()
67
-            pix = str(uid)[-1]
68
-            goodsInfo = goodsDb.get_goods_info(room_id, product_id, pix)
66
+                if heat>0:
67
+                    if random.randint(0, 9) > 0:
68
+                        rds_list.add_request_params(request_data, heat)
69
+                        print(str(heat) + '非首次失败重新插入 ' + request_data)
70
+                        sys.exit(0)
69 71
 
70
-            dataid,sold_out_at,revise_price,created_at = goodsInfo
72
+            if heat < 2:
73
+                goodsDb = MysqlDyLiveCommodity()
74
+                pix = str(uid)[-1]
75
+                goodsInfo = goodsDb.get_goods_info(room_id, product_id, pix)
71 76
 
72
-            if revise_price is not None:
73
-                sys.exit(0)
77
+                dataid,sold_out_at,revise_price,created_at = goodsInfo
74 78
 
75
-            if sold_out_at == '0000-00-00 00:00:00' or sold_out_at == '1970-01-01 08:00:00':
76
-                add_t = 300
77
-                news_time = int(time.time()) + add_t
78
-                next_rds = promotion_id + '@' + product_id + '@' + uid + '@' + room_id + '@' + str(news_time) 
79
-                rds_list.push_middle_list(next_rds)
80
-            else :
81
-                add_t = 1200
82
-                news_time = int(time.time()) + add_t
83
-                next_rds = promotion_id + '@' + product_id + '@' + uid + '@' + room_id + '@' + str(news_time) 
84
-                rds_list.push_last_list(next_rds)
85
-
86
-        if (commodity_detail is None) or (commodity_detail == '') or ('real_sell_num' in commodity_detail)==False:
87
-            sys.exit(0)
79
+                if revise_price is not None:
80
+                    sys.exit(0)
88 81
 
89
-        data = json.dumps({
90
-            "data": commodity_detail,
91
-            "extra": {
92
-                'room_id': room_id,
93
-                'promotion_id': promotion_id,
94
-                'product_id': product_id,
95
-                'uid': uid,
96
-                'heat': heat
97
-            }
98
-        })
99
-
100
-        rds_list.record_v1_score(1)
101
-        rds_list.push_v1_data_list(data)
102
-    except Exception as e:
103
-        rds_list.record_v1_score(0)
104
-        rds_list.add_request_params(request_data, heat)
105
-        PrintLog.print(
106
-            time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + '抛出异常!' + str(e) + '\n'
107
-            + request_data
108
-        )
82
+                if sold_out_at == '0000-00-00 00:00:00' or sold_out_at == '1970-01-01 08:00:00':
83
+                    add_t = 300
84
+                    news_time = int(time.time()) + add_t
85
+                    next_rds = promotion_id + '@' + product_id + '@' + uid + '@' + room_id + '@' + str(news_time) 
86
+                    rds_list.push_middle_list(next_rds)
87
+                else :
88
+                    add_t = 1200
89
+                    news_time = int(time.time()) + add_t
90
+                    next_rds = promotion_id + '@' + product_id + '@' + uid + '@' + room_id + '@' + str(news_time) 
91
+                    rds_list.push_last_list(next_rds)
92
+
93
+            if (commodity_detail is None) or (commodity_detail == '') or ('real_sell_num' in commodity_detail)==False:
94
+                sys.exit(0)
95
+
96
+            data = json.dumps({
97
+                "data": commodity_detail,
98
+                "extra": {
99
+                    'room_id': room_id,
100
+                    'promotion_id': promotion_id,
101
+                    'product_id': product_id,
102
+                    'uid': uid,
103
+                    'heat': heat
104
+                }
105
+            })
106
+
107
+            rds_list.record_v1_score(1)
108
+            rds_list.push_v1_data_list(data)
109
+        except Exception as e:
110
+            rds_list.record_v1_score(0)
111
+            rds_list.add_request_params(request_data, heat)
112
+            PrintLog.print(
113
+                time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + '抛出异常!' + str(e) + '\n'
114
+                + request_data
115
+            )
109 116
 
110
-    sys.exit(0)
111 117
 
112 118
 
113 119
 if __name__ == "__main__":
@@ -129,19 +135,12 @@ if __name__ == "__main__":
129 135
 
130 136
         increment = threading_count - active_count
131 137
 
132
-        while increment > 0:
138
+        if increment > 0:
133 139
 
134 140
             sys.stdout.flush()
135 141
 
136
-            request_data = rds.get_request_params(heat)
137
-
138
-            if request_data is None:
139
-                time.sleep(0.1)
140
-                break
141
-
142
-            task = threading.Thread(target=scrape, args=(request_data, heat))
142
+            task = threading.Thread(target=scrape, args=(heat))
143 143
             task.start()  # 准备就绪,等待cpu执行
144
-            increment = increment - 1
145 144
 
146 145
         current_time = time.time()
147 146
 

+ 49 - 50
live_promotions_scraper.py

@@ -14,58 +14,65 @@ from log.print_log import PrintLog
14 14
 start_time = time.time()
15 15
 
16 16
 
17
-def scrape(heat, request_data_str):
17
+def scrape(heat):
18 18
 
19
-    rds_list = RdsLivePromotionsRequestList()
19
+    while True:
20
+        rds = RdsLivePromotionsRequestList()
21
+        request_data_str = rds.get_request_params(heat)
20 22
 
21
-    request_data = json.loads(request_data_str)
22
-    uid = request_data.get('uid')
23
-    room_id = request_data.get('room_id')
23
+        if request_data_str is None:
24
+            time.sleep(0.1)
25
+            break
24 26
 
25
-    if (uid is None) or (room_id is None):
26
-        PrintLog.print(
27
-            time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + '请求数据数据异常!' + '\n'
28
-            + request_data_str
29
-        )
30
-        sys.exit(0)
27
+        rds_list = RdsLivePromotionsRequestList()
31 28
 
32
-    room_id = str(room_id)
33
-    uid = str(uid)
34
-    PrintLog.print(time.strftime("%H:%M:%S", time.localtime()) + ' ' + str(heat) + ' ' + room_id + ' ' + uid)
29
+        request_data = json.loads(request_data_str)
30
+        uid = request_data.get('uid')
31
+        room_id = request_data.get('room_id')
35 32
 
36
-    try:
33
+        if (uid is None) or (room_id is None):
34
+            PrintLog.print(
35
+                time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + '请求数据数据异常!' + '\n'
36
+                + request_data_str
37
+            )
38
+            sys.exit(0)
37 39
 
38
-        response = LivePromotions.get_data(uid, room_id)
40
+        room_id = str(room_id)
41
+        uid = str(uid)
42
+        PrintLog.print(time.strftime("%H:%M:%S", time.localtime()) + ' ' + str(heat) + ' ' + room_id + ' ' + uid)
39 43
 
40
-        response_json = response.json()
44
+        try:
41 45
 
42
-        if response_json.get('promotions') is None:
43
-            # rds_list.record_score(0)
44
-            sys.exit(0)
46
+            response = LivePromotions.get_data(uid, room_id)
45 47
 
46
-        # 没有商品
47
-        if len(response_json.get('promotions')) == 0:
48
-            sys.exit(0)
48
+            response_json = response.json()
49
+
50
+            if response_json.get('promotions') is None:
51
+                # rds_list.record_score(0)
52
+                sys.exit(0)
49 53
 
50
-        data = json.dumps({
51
-            "data": response_json,
52
-            "extra": {
53
-                'room_id': room_id,
54
-                'uid': uid,
55
-                'heat': heat,
56
-            }
57
-        })
54
+            # 没有商品
55
+            if len(response_json.get('promotions')) == 0:
56
+                sys.exit(0)
58 57
 
59
-        rds_list.record_score(1)
60
-        rds_list.push_data_list(data)
61
-    except Exception as e:
62
-        rds_list.record_score(0)
63
-        PrintLog.print(
64
-            time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + '抛出异常!' + str(e) + '\n'
65
-            + request_data_str
66
-        )
58
+            data = json.dumps({
59
+                "data": response_json,
60
+                "extra": {
61
+                    'room_id': room_id,
62
+                    'uid': uid,
63
+                    'heat': heat,
64
+                }
65
+            })
66
+
67
+            rds_list.record_score(1)
68
+            rds_list.push_data_list(data)
69
+        except Exception as e:
70
+            rds_list.record_score(0)
71
+            PrintLog.print(
72
+                time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + '抛出异常!' + str(e) + '\n'
73
+                + request_data_str
74
+            )
67 75
 
68
-    sys.exit(0)
69 76
 
70 77
 
71 78
 if __name__ == "__main__":
@@ -87,19 +94,11 @@ if __name__ == "__main__":
87 94
 
88 95
         increment = threading_count - active_count
89 96
 
90
-        while increment > 0:
97
+        if increment > 0:
91 98
 
92 99
             sys.stdout.flush()
93
-
94
-            request_data_str = rds.get_request_params(heat)
95
-
96
-            if request_data_str is None:
97
-                time.sleep(0.1)
98
-                break
99
-
100
-            task = threading.Thread(target=scrape, args=(heat, request_data_str))
100
+            task = threading.Thread(target=scrape, args=(heat))
101 101
             task.start()  # 准备就绪,等待cpu执行
102
-            increment = increment - 1
103 102
 
104 103
         current_time = time.time()
105 104