店播爬取Python脚本

dy_barrage_info.py 5.3KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. import requests
  2. import time
  3. from log.print_log import PrintLog
  4. from libs.Xg04 import X_Gorgon
  5. from libs.douyin_pb2 import DouYin
  6. from libs.douyin_user_pb2 import DouYinUser
  7. from libs.proxy import Proxy
  8. from libs.en0414 import DouYinApi
  9. class DyBarrageInfo:
  10. @staticmethod
  11. def get_data(room_id):
  12. room_id = str(room_id)
  13. proxy = Proxy.get()
  14. proxies = {
  15. "http": "http://" + proxy,
  16. "https": "http://" + proxy
  17. }
  18. douApi = DouYinApi('', proxies)
  19. result = douApi.register_device()
  20. device_id, iid, udid, openudid, cookie = result['device_id'], result['iid'], result['uuid'], result['openudid'], \
  21. result['cookie']
  22. douApi.init_device_ids(device_id, iid, udid, openudid)
  23. url = 'http://webcast3-normal-c-lf.amemv.com/webcast/room/' + room_id + '/_fetch_message_polling/?'
  24. query_param = 'os_api=23&' \
  25. 'device_type=HSF-FL00&' \
  26. 'ssmix=a&' \
  27. 'manifest_version_code=7301&' \
  28. 'dpi=480&' \
  29. 'app_name=aweme&' \
  30. 'version_name=7.3.0&' \
  31. 'app_type=normal&' \
  32. 'ac=wifi&' \
  33. 'host_abi=armeabi&' \
  34. 'update_version_code=7302&' \
  35. 'channel=douyin_tengxun_wzl&' \
  36. '_rticket=1629772802255&' \
  37. 'device_platform=android&' \
  38. 'iid=' + iid + '&' \
  39. 'version_code=730&' \
  40. 'device_id=' + device_id + '&' \
  41. 'resolution=1080*2340&' \
  42. 'os_version=6.0.1&' \
  43. 'language=zh&' \
  44. 'device_brand=HUAWEI&' \
  45. 'mcc_mnc=46000&' \
  46. 'ts=' + str(int(time.time())) + '&' \
  47. 'aid=1128'
  48. body = 'live_id=1&' \
  49. 'parse_cnt=0&' \
  50. 'recv_cnt=0&' \
  51. 'cursor=0&' \
  52. 'last_rtt=0&' \
  53. 'identity=audience&' \
  54. 'resp_content_type=protobuf&' \
  55. 'user_id=3117857528618044'
  56. url = url + query_param
  57. x_gorgon = X_Gorgon(query_param, body)
  58. headers = {
  59. 'Cookie': cookie,
  60. 'user-agent': 'okhttp/3.10.0.1',
  61. "X-Gorgon": x_gorgon.get('X-Gorgon'),
  62. "X-Khronos": x_gorgon.get('X-Khronos'),
  63. "Connection": 'close',
  64. "X-SS-REQ-TICKET": str(int(time.time() * 1000)),
  65. "x-ss-stub": '6AC103439C12D2FED188040DFB6D7799',
  66. "Content-Type": 'application/x-www-form-urlencoded',
  67. }
  68. retry = 0
  69. response_json = {'room_id': room_id, 'data': {}}
  70. while True:
  71. if retry > 10:
  72. break
  73. retry += 1
  74. try:
  75. response = requests.post(url, data=body, headers=headers, verify=False, proxies=proxies)
  76. if(response.status_code == 200) and (response.content is not None) and (response.content != ''):
  77. douyin = DouYin()
  78. douyin.ParseFromString(response.content)
  79. for k, c in list(enumerate(douyin.messages)):
  80. if c.method == 'WebcastChatMessage':
  81. douyin_user = DouYinUser()
  82. douyin_user.ParseFromString(c.payload)
  83. user_info = douyin_user.user
  84. response_json['data'][k] = {
  85. "message_id": c.messageId,
  86. "content": douyin_user.content,
  87. "uid": user_info.uid,
  88. "gender": user_info.gender,
  89. "nickname": user_info.nickname,
  90. "short_id": user_info.short_id,
  91. "publisher_dy_id": user_info.dy_id,
  92. "publisher_sec_uid": user_info.sec_uid
  93. }
  94. else:
  95. PrintLog.print(
  96. time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + '爬取http连接失败!' + str(
  97. response.status_code) + '\n'
  98. + Proxy.proxy_info + '\n'
  99. + room_id + '\n'
  100. )
  101. except requests.exceptions.ProxyError as e:
  102. PrintLog.print(
  103. time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + '代理过期!' + str(e) + '\n'
  104. + room_id + '\n'
  105. + Proxy.proxy_info
  106. )
  107. Proxy.del_proxy(proxy)
  108. pass
  109. except requests.exceptions.ConnectTimeout as e:
  110. PrintLog.print(
  111. time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' ConnectTimeout!' + str(e) + '\n'
  112. + room_id + '\n'
  113. + Proxy.proxy_info
  114. )
  115. Proxy.del_proxy(proxy)
  116. pass
  117. except Exception as e:
  118. PrintLog.print(
  119. time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + '请求抛出异常!' + str(e) + '\n'
  120. + room_id + '\n'
  121. + Proxy.proxy_info
  122. )
  123. pass
  124. if not bool(response_json['data']):
  125. return False
  126. return response_json