店播爬取Python脚本

room_info.py 4.6KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133
  1. import requests
  2. import time
  3. import random
  4. import urllib
  5. from libs.proxy import Proxy
  6. from log.print_log import PrintLog
  7. from libs.Xg04 import X_Gorgon
  8. class RoomInfo:
  9. @staticmethod
  10. def get_data(room_id):
  11. domain = random.choice([
  12. 'webcast.amemv.com',
  13. 'webcast-hl.amemv.com',
  14. 'webcast3.amemv.com',
  15. 'webcast3-hl.amemv.com',
  16. 'webcast3-normal-c-lf.amemv.com'
  17. ])
  18. app_id = random.choice([
  19. '1128',
  20. '2329'
  21. ])
  22. # https://webcast-hl.amemv.com/webcast/room/reflow/info/?room_id=6844063184001698568&type_id=0&user_id=81374519744&live_id=1&app_id=1128
  23. # url = 'http://webcast-hl.amemv.com/webcast/room/reflow/info/?room_id=' + room_id + '&type_id=0&user_id=1&live_id=' + live_id + '&app_id=1128'
  24. # url = 'http://'+ domain +'.amemv.com/webcast/room/reflow/info/?room_id=' + room_id + '&type_id=0&user_id=1&live_id=1&app_id='+ app_id +'&aid=' + app_id
  25. url = 'http://' + domain + '/webcast/room/reflow/info/?'
  26. rticket = str(int(time.time() * 1000))
  27. query = {
  28. "room_id": room_id,
  29. "type_id": "2", # 0,1
  30. "user_id": "104255897823",
  31. "live_id": "1",
  32. "app_id": app_id
  33. }
  34. query_params = urllib.parse.urlencode(query)
  35. url = url + query_params
  36. body = ''
  37. xGorgon = X_Gorgon(query_params, body)
  38. userAgent = 'okhttp/3.' + str(random.randint(0, 10)) + '.' + str(random.randint(0, 10)) + '.' + str(
  39. random.randint(1, 10))
  40. headers = {
  41. 'Host': domain,
  42. 'Connection': 'keep-alive',
  43. 'Cache-Control': 'max-age=0',
  44. 'Upgrade-Insecure-Requests': '1',
  45. 'User-Agent': userAgent,
  46. 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
  47. "X-SS-REQ-TICKET": rticket,
  48. "X-Gorgon": xGorgon.get('X-Gorgon'),
  49. "X-Khronos": xGorgon.get('X-Khronos'),
  50. }
  51. retry = 0
  52. response_json = None
  53. while True:
  54. if retry > 10:
  55. break
  56. retry += 1
  57. proxy = Proxy.get()
  58. proxies = {
  59. "http": "http://" + proxy,
  60. "https": "http://" + proxy
  61. }
  62. try:
  63. response = requests.get(
  64. url,
  65. headers=headers,
  66. proxies=proxies,
  67. timeout=8
  68. )
  69. if (response.status_code == 200) and (response.text is not None) and (response.text != ''):
  70. response_json = response.json()
  71. if (response_json.get('data') is not None) and (response_json.get('data').get('room') is not None):
  72. break
  73. else:
  74. PrintLog.print(
  75. time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + '数据获取失败!' + '\n'
  76. + room_id + '\n'
  77. + response.text
  78. + Proxy.proxy_info
  79. )
  80. else:
  81. PrintLog.print(
  82. time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + '爬取http连接失败!' + str(
  83. response.status_code) + '\n'
  84. + Proxy.proxy_info + '\n'
  85. + room_id + '\n'
  86. )
  87. time.sleep(1)
  88. except requests.exceptions.ProxyError as e:
  89. PrintLog.print(
  90. time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + '代理过期!' + str(e) + '\n'
  91. + room_id + '\n'
  92. + Proxy.proxy_info
  93. )
  94. Proxy.del_proxy(proxy)
  95. pass
  96. except requests.exceptions.ConnectTimeout as e:
  97. PrintLog.print(
  98. time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' ConnectTimeout!' + str(e) + '\n'
  99. + room_id + '\n'
  100. + Proxy.proxy_info
  101. )
  102. Proxy.del_proxy(proxy)
  103. pass
  104. except Exception as e:
  105. PrintLog.print(
  106. time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + '请求抛出异常!' + str(e) + '\n'
  107. + room_id + '\n'
  108. + Proxy.proxy_info
  109. )
  110. pass
  111. return response_json