店播爬取Python脚本

live_promotions.py 3.4KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889
  1. import requests
  2. import time
  3. import random
  4. from libs.proxy import Proxy
  5. from log.print_log import PrintLog
  6. class LivePromotions:
  7. @staticmethod
  8. def get_data(uid, room_id):
  9. # if random.randint(0, 2) > 0:
  10. # uid = '1'
  11. # url = 'https://lianmengapi.snssdk.com/live/promotions/?author_id=' + uid + '&room_id=' + room_id + '&aid=1128'
  12. url = 'http://lianmengapi.snssdk.com/live/livePromotions/?author_id=' + uid + '&room_id=' + room_id + '&os_api=22&device_type=vivo%20X20&device_platform=android&ssmix=a&manifest_version_code=730&dpi=320&version_code=730&app_name=aweme&version_name=7.3.0&resolution=900*1600&language=zh&device_brand=vivo%20&app_type=normal&ac=wifi&update_version_code=7302&aid=1128&channel=xiaomi&mcc_mnc=46007'
  13. headers = {
  14. 'Host': 'lianmengapi.snssdk.com',
  15. 'Connection': 'keep-alive',
  16. 'Cache-Control': 'max-age=0',
  17. 'Upgrade-Insecure-Requests': '1',
  18. 'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3908.2 Mobile Safari/537.36',
  19. 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
  20. 'Sec-Fetch-Site': 'none',
  21. 'Sec-Fetch-Mode': 'navigate',
  22. 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8'
  23. }
  24. retry = 0
  25. response = None
  26. while True:
  27. if retry > 10:
  28. break
  29. retry += 1
  30. proxy = Proxy.get()
  31. proxies = {
  32. "http": "http://" + proxy,
  33. "https": "http://" + proxy
  34. }
  35. try:
  36. response = requests.get(
  37. url,
  38. headers=headers,
  39. proxies=proxies,
  40. timeout=10
  41. )
  42. if (response.status_code == 200) and (response.text is not None) and (response.text != ''):
  43. break
  44. else:
  45. PrintLog.print(
  46. time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + '爬取http连接失败!' + str(response.status_code) + '\n'
  47. + Proxy.proxy_info + '\n'
  48. + uid + ' ' + room_id + '\n'
  49. )
  50. time.sleep(1)
  51. except requests.exceptions.ProxyError as e:
  52. PrintLog.print(
  53. time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + '代理过期!' + str(e) + '\n'
  54. + uid + '\n'
  55. + Proxy.proxy_info
  56. )
  57. Proxy.del_proxy(proxy)
  58. pass
  59. except requests.exceptions.ConnectTimeout as e:
  60. PrintLog.print(
  61. time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' ConnectTimeout!' + str(e) + '\n'
  62. + uid + '\n'
  63. + Proxy.proxy_info
  64. )
  65. Proxy.del_proxy(proxy)
  66. pass
  67. except Exception as e:
  68. PrintLog.print(
  69. time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + '请求抛出异常!' + str(e) + '\n'
  70. + uid + ' ' + room_id + '\n'
  71. + Proxy.proxy_info
  72. )
  73. pass
  74. return response