店播爬取Python脚本

live_promotions_scraper.py 2.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
  1. #!/usr/bin/python3
  2. # coding=utf-8
  3. # -*- coding: utf-8 -*-
  4. import time
  5. import json
  6. import threading
  7. import sys
  8. from rds_model.rds_live_promotions_request_list import RdsLivePromotionsRequestList
  9. from libs.live_promotions import LivePromotions
  10. from log.print_log import PrintLog
  11. start_time = time.time()
  12. def scrape(heat):
  13. while True:
  14. rds = RdsLivePromotionsRequestList()
  15. request_data_str = rds.get_request_params(heat)
  16. if request_data_str is None:
  17. time.sleep(1)
  18. continue
  19. rds_list = RdsLivePromotionsRequestList()
  20. request_data = json.loads(request_data_str)
  21. uid = request_data.get('uid')
  22. room_id = request_data.get('room_id')
  23. if (uid is None) or (room_id is None):
  24. PrintLog.print(
  25. time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + '请求数据数据异常!' + '\n'
  26. + request_data_str
  27. )
  28. continue
  29. room_id = str(room_id)
  30. uid = str(uid)
  31. PrintLog.print(time.strftime("%H:%M:%S", time.localtime()) + ' ' + str(heat) + ' ' + room_id + ' ' + uid)
  32. try:
  33. response = LivePromotions.get_data(uid, room_id)
  34. response_json = response.json()
  35. if response_json.get('promotions') is None:
  36. # rds_list.record_score(0)
  37. continue
  38. # 没有商品
  39. if len(response_json.get('promotions')) == 0:
  40. continue
  41. data = json.dumps({
  42. "data": response_json,
  43. "extra": {
  44. 'room_id': room_id,
  45. 'uid': uid,
  46. 'heat': heat,
  47. }
  48. })
  49. rds_list.record_score(1)
  50. rds_list.push_data_list(data)
  51. except Exception as e:
  52. rds_list.record_score(0)
  53. PrintLog.print(
  54. time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + '抛出异常!' + str(e) + '\n'
  55. + request_data_str
  56. )
  57. if __name__ == "__main__":
  58. print("主方法开始执行")
  59. heat = int(sys.argv[1])
  60. # 并行线程数
  61. threading_count = int(sys.argv[2])
  62. rds = RdsLivePromotionsRequestList()
  63. print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' 爬取直播商品队列长度为:' + str(heat) + ' ' + str(rds.get_len(heat)))
  64. while True:
  65. sys.stdout.flush()
  66. # 减去主线程
  67. active_count = threading.active_count() - 1
  68. increment = threading_count - active_count
  69. if increment > 0:
  70. sys.stdout.flush()
  71. task = threading.Thread(target=scrape, args=(heat,))
  72. print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' 启动线程' + str(increment))
  73. task.start() # 准备就绪,等待cpu执行
  74. current_time = time.time()
  75. time.sleep(0.01)