店播爬取Python脚本

live_promotions_scraper.py 2.8KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. #!/usr/bin/python3
  2. # coding=utf-8
  3. # -*- coding: utf-8 -*-
  4. import time
  5. import json
  6. import threading
  7. import sys
  8. from rds_model.rds_live_promotions_request_list import RdsLivePromotionsRequestList
  9. from libs.live_promotions import LivePromotions
  10. from log.print_log import PrintLog
  11. start_time = time.time()
  12. def scrape(heat, request_data_str):
  13. rds_list = RdsLivePromotionsRequestList()
  14. request_data = json.loads(request_data_str)
  15. uid = request_data.get('uid')
  16. room_id = request_data.get('room_id')
  17. if (uid is None) or (room_id is None):
  18. PrintLog.print(
  19. time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + '请求数据数据异常!' + '\n'
  20. + request_data_str
  21. )
  22. sys.exit(0)
  23. room_id = str(room_id)
  24. uid = str(uid)
  25. PrintLog.print(time.strftime("%H:%M:%S", time.localtime()) + ' ' + str(heat) + ' ' + room_id + ' ' + uid)
  26. try:
  27. response = LivePromotions.get_data(uid, room_id)
  28. response_json = response.json()
  29. if response_json.get('promotions') is None:
  30. # rds_list.record_score(0)
  31. sys.exit(0)
  32. # 没有商品
  33. if len(response_json.get('promotions')) == 0:
  34. sys.exit(0)
  35. data = json.dumps({
  36. "data": response_json,
  37. "extra": {
  38. 'room_id': room_id,
  39. 'uid': uid,
  40. 'heat': heat,
  41. }
  42. })
  43. rds_list.record_score(1)
  44. rds_list.push_data_list(data)
  45. except Exception as e:
  46. rds_list.record_score(0)
  47. PrintLog.print(
  48. time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + '抛出异常!' + str(e) + '\n'
  49. + request_data_str
  50. )
  51. sys.exit(0)
  52. if __name__ == "__main__":
  53. print("主方法开始执行")
  54. heat = int(sys.argv[1])
  55. # 并行线程数
  56. threading_count = int(sys.argv[2])
  57. rds = RdsLivePromotionsRequestList()
  58. print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' 爬取直播商品队列长度为:' + str(heat) + ' ' + str(rds.get_len(heat)))
  59. while True:
  60. sys.stdout.flush()
  61. # 减去主线程
  62. active_count = threading.active_count() - 1
  63. increment = threading_count - active_count
  64. while increment > 0:
  65. sys.stdout.flush()
  66. request_data_str = rds.get_request_params(heat)
  67. if request_data_str is None:
  68. time.sleep(0.1)
  69. break
  70. task = threading.Thread(target=scrape, args=(heat, request_data_str))
  71. task.start() # 准备就绪,等待cpu执行
  72. increment = increment - 1
  73. current_time = time.time()
  74. if current_time - start_time > 1800:
  75. print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' 主方法执行终止')
  76. sys.exit(0)
  77. time.sleep(0.01)