店播爬取Python脚本

dy_user_live_info.py 2.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081
  1. import requests
  2. import time
  3. from libs.proxy import Proxy
  4. from log.print_log import PrintLog
  5. class DyUserLiveInfo:
  6. @staticmethod
  7. def get_data(uid):
  8. url = 'http://webcast-hl.amemv.com/webcast/room/reflow/info/?room_id=6798432314704530190&type_id=2&user_id=' + uid + '&live_id=1&app_id=1128'
  9. headers = {
  10. 'Host': 'webcast-hl.amemv.com',
  11. 'Connection': 'keep-alive',
  12. 'Cache-Control': 'max-age=0',
  13. 'Upgrade-Insecure-Requests': '1',
  14. 'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3908.2 Mobile Safari/537.36',
  15. 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
  16. 'Sec-Fetch-Site': 'none',
  17. 'Sec-Fetch-Mode': 'navigate',
  18. 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8'
  19. }
  20. retry = 0
  21. response = None
  22. while True:
  23. if retry > 10:
  24. break
  25. retry += 1
  26. proxy = Proxy.get()
  27. proxies = {
  28. "http": "http://" + proxy,
  29. "https": "http://" + proxy
  30. }
  31. try:
  32. response = requests.get(
  33. url,
  34. headers=headers,
  35. proxies=proxies,
  36. timeout=8
  37. )
  38. if (response.status_code == 200) and (response.text is not None) and (response.text != ''):
  39. break
  40. else:
  41. PrintLog.print(
  42. time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' 爬取http连接失败!' + str(response.status_code) + '\n'
  43. + Proxy.proxy_info + '\n'
  44. + uid + '\n'
  45. )
  46. time.sleep(1)
  47. except requests.exceptions.ProxyError as e:
  48. PrintLog.print(
  49. time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' 代理过期!' + str(e) + '\n'
  50. + uid + '\n'
  51. + Proxy.proxy_info
  52. )
  53. Proxy.del_proxy(proxy)
  54. pass
  55. except requests.exceptions.ConnectTimeout as e:
  56. PrintLog.print(
  57. time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' ConnectTimeout!' + str(e) + '\n'
  58. + uid + '\n'
  59. + Proxy.proxy_info
  60. )
  61. Proxy.del_proxy(proxy)
  62. pass
  63. except Exception as e:
  64. PrintLog.print(
  65. time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' 请求抛出异常!' + str(e) + '\n'
  66. + uid + '\n'
  67. + Proxy.proxy_info
  68. )
  69. pass
  70. return response.text