店播爬取Python脚本

barrage_token_server.py 6.3KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128
  1. #!/usr/bin/python3
  2. # coding=utf-8
  3. # -*- coding: utf-8 -*-
  4. from flask import Flask, request
  5. import redis
  6. import requests
  7. import time
  8. import sys
  9. import json
  10. from libs.proxy import Proxy
  11. def get_websocket_info(live_stream_id, retry=0):
  12. param_json = {
  13. "operationName": "WebSocketInfoQuery",
  14. "variables": {
  15. "liveStreamId": live_stream_id
  16. },
  17. "query": "query WebSocketInfoQuery($liveStreamId: String) {\n webSocketInfo(liveStreamId: $liveStreamId) {\n token\n webSocketUrls\n __typename\n }\n}\n"
  18. }
  19. user_agents = {
  20. 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3908.2 Safari/537.36',
  21. 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36',
  22. 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/82.0.4055.0 Safari/537.36',
  23. 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36',
  24. }
  25. cookie_list = [
  26. 'web_8f5a71d99d502219d7ce1ca1ffec68cc; clientid=3; client_key=65890b29; Hm_lvt_86a27b7db2c5c0ae37fee4a8a35033ee=1595212029,1597053525; userId=1089887853; kuaishou.live.bfb1s=477cb0011daca84b36b3a4676857e5a1; didv=1603703368000; kpn=GAME_ZONE; kuaishou.live.web_st=ChRrdWFpc2hvdS5saXZlLndlYi5zdBKgATKQJmTG8mk6lH2pOJeVa-184Sq95qCmEB1lP_q3lItdj6a6gnYgCIcPkt1ZvnJlE3cAkBVIiK46xUEyJ1SZe2hatA7MGwchvIhNlvumZQWfwUHwDOKWRkpNOn-4c0wYnHmgituvkv2B2XJC1lnaLBJ57zZU6iKrtgtXyekfNrf37VW3n0cpGF8LfjxvfQibwYfxewf2uOhDQzKnCA-kLosaEhqtgBlt9k0TlHg5Y4Goo9D9kyIgPsuUj-GlXMBsywOViBgpl-tt7OVb051_AZAHST9ItzAoBTAB; kuaishou.live.web_ph=8f83f1ed0c54c288352d86b3a23fe996499e; userId=1089887853',
  27. 'kuaishou.live.bfb1s=7206d814e5c089a58c910ed8bf52ace5; clientid=3; client_key=65890b29; sid=83cf0fb9ee15fde6b6987781; did=web_c280cf49619a100d8305b6f095ed5348; userId=1509370340; didv=1600053596890; WEBLOGGER_HTTP_SEQ_ID=18928; WEBLOGGER_INCREAMENT_ID_KEY=19033; Hm_lvt_86a27b7db2c5c0ae37fee4a8a35033ee=1601391911; Hm_lpvt_86a27b7db2c5c0ae37fee4a8a35033ee=1602073621; kpn=GAME_ZONE; kuaishou.live.web_st=ChRrdWFpc2hvdS5saXZlLndlYi5zdBKgAcSZ7bO7s4CskEWV8oXNhqBeAW7qebb6Gv8_pmwLpeDLgzlIl3Nwgd2qal6895okBZxhkFFXQLIRvSJss1ALy5H2k0B9JvRZc9RNm4FzKmZ4eWPx2B3f-RLyiz6CkNpEsjcgpCQ90pQyBJgc9nWyo3vJRJS9fl53t2hFW29cvcHJktjSh4VRl-JrZ6WI3r3Et1kGxyh0wGqW3Q3xWzVMmIkaEsvpGUru20c-iIt7T0W8MQrXwiIgMbhYaFs2XBsAXpVKQoE6xmrFlpUw3VfiZaze7OM6mkEoBTAB; kuaishou.live.web_ph=6248010c3bbf0923d41c9d3c231b2903007e; userId=1509370340',
  28. 'did=web_03d225bf290f905c1d1a5b3810af3b07; didv=1607676125725; kuaishou.live.bfb1s=9b8f70844293bed778aade6e0a8f9942; clientid=3; client_key=65890b29; kpn=GAME_ZONE; Hm_lvt_86a27b7db2c5c0ae37fee4a8a35033ee=1607676148; Hm_lpvt_86a27b7db2c5c0ae37fee4a8a35033ee=1607676148; userId=2020851346; userId=2020851346; kuaishou.live.web_st=ChRrdWFpc2hvdS5saXZlLndlYi5zdBKgAR00HvTWTmSGTYkxB2Mny84aCe3Lu8qGgMyP7Bhp7Y8lHqkC4TPLNt8H71ZesLiX4hpu9eTzOCiCmzXp7bCHQxzYt6_POWrTVT9L-jgFL4ET3JmTqrjxudwbSZQP-icUJACHgXEpf9a_BXvPS1RrBnl82Gyg1LwLXxIo0uIdwhheHrAp3u4h4C6S5lqWvZ7aNX1nDk_5Jsyq8UqsuCXLNiMaEgCrAu8bFEUPixNgRvVq1Nb0ZSIgKyTMpEkPIRE47p3L_U0hfzZ2IIBI9YfiD5jI34L2zmAoBTAB; kuaishou.live.web_ph=2f965caadcde0572313c46a7f8de50843381'
  29. ]
  30. res = redis_connection.zrangebyscore(scrape_cookie_key, 0, '+inf', start=0, num=1, withscores=True)
  31. if len(res) == 0:
  32. cookie = 'kuaishou.live.bfb1s=9b8f70844293bed778aade6e0a8f9942; clientid=3; did=web_2ba8ea2ca07df85a9e193a75d443d128; client_key=65890b29; kpn=GAME_ZONE; userId=1509370340; userId=1509370340; kuaishou.live.web_st=ChRrdWFpc2hvdS5saXZlLndlYi5zdBKgAZIqgEWU0YHOwebl3XEBuu7odxIyvSspUq1v_kvLGCVI_eURJXwii1_AN0CwuxScDBKoK2y2HIghAZLxMVzJhR0wQ4IHJ0zi3TtLsnrbNlA9VnnSZpuHK2b1M2RxEiFEKBRTK0AwnRzN7UOvkKQTfOIxtALOFXqSPOzJ4R8ScI30o_CqmgWPbg7dw0Kt5sgpGFYcZMCA1vgkFhiph-O4cE8aEjGueioax06vmORaF3eBQr3cQSIgsBvfCInpIGIqrTgvvo3648essPww4pkcAKNHOgeNHjUoBTAB; kuaishou.live.web_ph=d77227e666b43027931968af443b14bd7b28'
  33. else:
  34. cookie, times = res[0]
  35. print('获取cookie ' + str(int(times)) + ' ' + cookie[0:30])
  36. headers = {
  37. 'user-agent': user_agents.pop(),
  38. 'Cookie': cookie,
  39. 'Referer': 'https://live.kuaishou.com/u/3xse' + live_stream_id
  40. }
  41. proxy = Proxy.get()
  42. print('获取代理 ' + proxy)
  43. proxies = {
  44. "http": "http://" + proxy,
  45. "https": "http://" + proxy
  46. }
  47. try:
  48. # https://live.kuaishou.com/live_graphql
  49. r = requests.post(
  50. "https://live.kuaishou.com/m_graphql",
  51. json=param_json,
  52. headers=headers,
  53. proxies=proxies,
  54. timeout=30
  55. )
  56. webSocketInfo = r.json()['data']['webSocketInfo']
  57. token = webSocketInfo.get('token')
  58. if token is None:
  59. redis_connection.zincrby(scrape_cookie_key, 1, cookie)
  60. Proxy.del_proxy(proxy)
  61. print('获取Token失败' + Proxy.proxy_info)
  62. # 重试10次还失败
  63. if retry > 10:
  64. return "wss://live-ws-pg.kuaishou.com/websocket", "lWQRDokMCJn2+oZyzA7dMovsp8AviGc/8NaWgfY0A4Jtp8trEaxG4Fy/49rkUtiERneEtNXYSofggh+PZUzpFHbUMCjNi/j/OxpfectTdDWWaO2VgGZbX8MBpcuQolEiGqS5fKwTqvbjV32an6gfNeIopmUI7HxwLQuobGCQQsM="
  65. time.sleep(2)
  66. retry = retry + 1
  67. return get_websocket_info(live_stream_id, retry)
  68. url = webSocketInfo['webSocketUrls'][0]
  69. return url, token
  70. except Exception as e:
  71. print('获取Token抛出异常:' + str(e) + '\n' + Proxy.proxy_info)
  72. return get_websocket_info(live_stream_id, retry)
  73. app = Flask(__name__)
  74. requests.packages.urllib3.disable_warnings()
  75. redis_connection = redis.Redis(
  76. host='r-2ze28bdb7389a8a4.redis.rds.aliyuncs.com',
  77. port=6379,
  78. password='Zhuaduoduo2017',
  79. decode_responses=True
  80. )
  81. # cookie
  82. scrape_cookie_key = 'barrageScraperCookieSortSet'
  83. @app.route('/getToken', methods=['GET'])
  84. def getToken():
  85. live_stream_id = request.args.get('live_stream_id')
  86. if live_stream_id is not None:
  87. url, token = get_websocket_info(live_stream_id)
  88. return json.dumps({
  89. 'url': url,
  90. 'token': token,
  91. })
  92. return ''
  93. # def run_server(port):
  94. # app.run(host='0.0.0.0', port=port)
  95. if __name__ == "__main__":
  96. port = int(sys.argv[1])
  97. app.run(host='0.0.0.0', port=port)