店播爬取Python脚本

barrage.py 13KB


  1. #!/usr/bin/python3
  2. #coding=utf-8
  3. #-*- coding: UTF-8 -*
  4. class MessageDecode:
  5. def __init__(self, buf):
  6. self.buf = buf
  7. self.pos = 0
  8. self.message = {}
  9. def __len__(self):
  10. return len(self.buf)
  11. def int_(self):
  12. res = 0
  13. i = 0
  14. while self.buf[self.pos] > 128:
  15. res = res | (127 & self.buf[self.pos]) << 7 * i
  16. self.pos += 1
  17. i += 1
  18. res = res | self.buf[self.pos] << 7 * i
  19. self.pos += 1
  20. return res
  21. @staticmethod
  22. def hex_(n: int) -> list:
  23. res = []
  24. while n > 128:
  25. res.append((n & 127) | 128)
  26. n = n >> 7
  27. res.append(n)
  28. return res
  29. def bytes(self):
  30. e = self.int_()
  31. if e + self.pos > len(self.buf):
  32. raise Exception('长度不匹配')
  33. res = self.buf[self.pos:e + self.pos]
  34. self.pos += e
  35. return res
  36. def skip(self, e=0):
  37. """跳过多少字节"""
  38. if e == 0:
  39. while 128 & self.buf[self.pos] :
  40. self.pos += 1
  41. if self.pos >= len(self.buf):
  42. return
  43. self.pos += 1
  44. else:
  45. self.pos += e
  46. def skipType(self, e):
  47. if e == 0:
  48. self.skip()
  49. elif e == 1:
  50. self.skip(8)
  51. elif e == 2:
  52. self.skip(self.int_())
  53. elif e == 3:
  54. while True:
  55. e = 7 & self.int_()
  56. if 4 != e:
  57. self.skipType(e)
  58. else:
  59. break
  60. elif e == 5:
  61. self.skip(4)
  62. else:
  63. raise Exception('跳过类型错误')
  64. def decode(self):
  65. """只处理弹幕"""
  66. length = len(self)
  67. while self.pos < length:
  68. t = self.int_()
  69. tt = t >> 3
  70. if tt == 1:
  71. self.message['payloadType'] = self.int_()
  72. if self.message['payloadType'] == 310: # 弹幕
  73. continue
  74. elif self.message['payloadType'] == 381: # PK
  75. continue
  76. return False
  77. elif tt == 2:
  78. self.message['compressionType'] = self.int_()
  79. elif tt == 3:
  80. self.message['payload'] = self.bytes()
  81. else:
  82. self.skipType(t & 7)
  83. return True
  84. def string(self):
  85. e = self.bytes()
  86. n = len(e)
  87. if n < 1:
  88. return ""
  89. s = []
  90. t = 0
  91. while t < n:
  92. r = e[t]
  93. t += 1
  94. if r < 128:
  95. s.append(r)
  96. elif 191 < r < 224:
  97. s.append((31 & r) << 6 | 63 & e[t])
  98. t += 1
  99. elif 239 < r < 365:
  100. x = (7 & r) << 18 | (63 & e[t]) << 12
  101. t += 1
  102. y = (63 & e[t]) << 6
  103. t += 1
  104. z = 63 & e[t]
  105. t += 1
  106. r = (x | y | z) - 65536
  107. s.append(55296 + (r >> 10))
  108. s.append(56320 + (1023 & r))
  109. else:
  110. x = (15 & r) << 12
  111. y = (63 & e[t]) << 6
  112. t += 1
  113. z = 63 & e[t]
  114. t += 1
  115. s.append(x | y | z)
  116. string = ''
  117. for w in s:
  118. string += chr(w) # python3
  119. return string
  120. def user_info_decode(self, r, l):
  121. c = self.pos + l
  122. m = {}
  123. while self.pos < c:
  124. t = self.int_()
  125. tt = t >> 3
  126. if tt == 1:
  127. m['principalId'] = self.string()
  128. elif tt == 2:
  129. m['userName'] = self.string()
  130. elif tt == 3:
  131. m['headUrl'] = self.string()
  132. else:
  133. self.skipType(t & 7)
  134. return m
  135. def comment_decode(self, r, l):
  136. c = self.pos + l
  137. m = {}
  138. while self.pos < c:
  139. t = self.int_()
  140. tt = t >> 3
  141. if tt == 1:
  142. m['id'] = self.string()
  143. elif tt == 2:
  144. m['user'] = self.user_info_decode(self.buf, self.int_())
  145. elif tt == 3:
  146. m['content'] = self.string()
  147. elif tt == 4:
  148. m['deviceHash'] = self.string()
  149. elif tt == 5:
  150. m['sortRank'] = self.int_()
  151. elif tt == 6:
  152. m['color'] = self.string()
  153. else:
  154. self.skipType(t & 7)
  155. return m
  156. def gift_decode(self, r, l):
  157. c = self.pos + l
  158. m = {}
  159. while self.pos < c:
  160. t = self.int_()
  161. tt = t >> 3
  162. if tt == 1:
  163. m['id'] = self.string()
  164. elif tt == 2:
  165. m['user'] = self.user_info_decode(self.buf, self.int_())
  166. elif tt == 3:
  167. m['time'] = self.int_()
  168. elif tt == 4:
  169. m['giftId'] = self.int_()
  170. elif tt == 5:
  171. m['sortRank'] = self.int_()
  172. elif tt == 6:
  173. m['mergeKey'] = self.string()
  174. elif tt == 7:
  175. m['batchSize'] = self.int_()
  176. elif tt == 8:
  177. m['comboCount'] = self.int_()
  178. elif tt == 9:
  179. m['rank'] = self.int_()
  180. elif tt == 10:
  181. m['expireDuration'] = self.int_()
  182. elif tt == 11:
  183. m['clientTimestamp'] = self.int_()
  184. elif tt == 12:
  185. m['slotDisplayDuration'] = self.int_()
  186. elif tt == 13:
  187. m['starLevel'] = self.int_()
  188. elif tt == 14:
  189. m['styleType'] = self.int_()
  190. elif tt == 15:
  191. m['liveAssistantType'] = self.int_()
  192. elif tt == 16:
  193. m['deviceHash'] = self.string()
  194. elif tt == 17:
  195. m['danmakuDisplay'] = self.bool()
  196. else:
  197. self.skipType(t & 7)
  198. return m
  199. def like_decode(self, r, l):
  200. c = self.pos + l
  201. m = {}
  202. while self.pos < c:
  203. t = self.int_()
  204. tt = t >> 3
  205. if tt == 1:
  206. m['id'] = self.string()
  207. elif tt == 2:
  208. m['user'] = self.user_info_decode(self.buf, self.int_())
  209. elif tt == 3:
  210. m['sortRank'] = self.int_()
  211. elif tt == 4:
  212. m['deviceHash'] = self.string()
  213. else:
  214. self.skipType(t & 7)
  215. return m
  216. def combo_comment_decode(self, r, l):
  217. c = self.pos + l
  218. m = {}
  219. while self.pos < c:
  220. t = self.int_()
  221. tt = t >> 3
  222. if tt == 1:
  223. m['id'] = self.string()
  224. elif tt == 2:
  225. m['content'] = self.string()
  226. elif tt == 3:
  227. m['comboCount'] = self.int_()
  228. else:
  229. self.skipType(t & 7)
  230. return m
  231. def pk_player_statistic_decode(self, r, l):
  232. c = self.pos + l
  233. m = {}
  234. while self.pos < c:
  235. t = self.int_()
  236. tt = t >> 3
  237. if tt == 1:
  238. m['user_info'] = self.user_info_decode(self.buf, self.int_())
  239. else:
  240. self.skipType(t & 7)
  241. return m
  242. def feed_decode(self):
  243. self.pos = 0
  244. self.buf = self.message['payload']
  245. length = len(self.buf)
  246. while self.pos < length:
  247. t = self.int_()
  248. tt = t >> 3
  249. # print(tt)
  250. if tt == 1:
  251. self.message['displayWatchingCount'] = self.string()
  252. # print("观看人数:" + self.message['displayWatchingCount'])
  253. elif tt == 2:
  254. self.message['displayLikeCount'] = self.string()
  255. # print("点赞数:" + self.message['displayLikeCount'])
  256. elif tt == 3:
  257. self.message['pendingLikeCount'] = self.int_()
  258. elif tt == 5:
  259. if not self.message.get('user'):
  260. self.message['user'] = []
  261. self.message['user'].append(self.comment_decode(self.buf, self.int_()))
  262. elif tt == 6:
  263. self.message['commentCursor'] = self.string()
  264. elif tt == 7:
  265. if not self.message.get('comboComment'):
  266. self.message['comboComment'] = []
  267. self.message['comboComment'].append(self.combo_comment_decode(self.buf, self.int_()))
  268. elif tt == 8: # 点赞
  269. if not self.message.get('like'):
  270. self.message['like'] = []
  271. self.message['like'].append(self.like_decode(self.buf, self.int_()))
  272. elif tt == 9: # 礼物
  273. if not self.message.get('gift'):
  274. self.message['gift'] = []
  275. self.message['gift'].append(self.gift_decode(self.buf, self.int_()))
  276. def pk_stat_decode(self):
  277. self.pos = 0
  278. self.buf = self.message['payload']
  279. length = len(self.buf)
  280. while self.pos < length:
  281. t = self.int_()
  282. tt = t >> 3
  283. if tt == 1:
  284. self.message['pkId'] = self.string()
  285. if tt == 2:
  286. self.message['time'] = self.int_()
  287. if tt == 3:
  288. self.message['startTime'] = self.int_()
  289. if tt == 4:
  290. self.message['voteDeadline'] = self.int_()
  291. if tt == 5:
  292. # PkPlayerStatistic
  293. if not self.message.get('pkPlayerStatistic'):
  294. self.message['pkPlayerStatistic'] = []
  295. self.message['pkPlayerStatistic'].append(self.pk_player_statistic_decode(self.buf, self.int_()))
  296. if tt == 6:
  297. # voteEnd
  298. self.message['voteEnd'] = self.int_()
  299. if tt == 7:
  300. # voteEndWaitDeadline
  301. self.message['voteEndWaitDeadline'] = self.int_()
  302. if tt == 8:
  303. # prePenaltyDeadline
  304. self.message['prePenaltyDeadline'] = self.int_()
  305. if tt == 9:
  306. # penaltyDeadline
  307. self.message['penaltyDeadline'] = self.int_()
  308. if tt == 10:
  309. # showType
  310. self.message['showType'] = self.string()
  311. if tt == 11:
  312. # mvpUserId
  313. self.message['mvpUserId'] = self.int_()
  314. if tt == 12:
  315. # mvpShowDialogDeadline
  316. self.message['mvpShowDialogDeadline'] = self.int_()
  317. if __name__ == '__main__':
  318. m = {"0": 8, "1": 182, "2": 2, "3": 16, "4": 1, "5": 26, "6": 162, "7": 1, "8": 10, "9": 4, "10": 50, "11": 48,
  319. "12": 48, "13": 43, "14": 18, "15": 4, "16": 49, "17": 54, "18": 55, "19": 50, "20": 74, "21": 76, "22": 18,
  320. "23": 30, "24": 10, "25": 11, "26": 115, "27": 49, "28": 48, "29": 49, "30": 54, "31": 57, "32": 55, "33": 56,
  321. "34": 48, "35": 51, "36": 48, "37": 18, "38": 15, "39": 230, "40": 136, "41": 145, "42": 239, "43": 188,
  322. "44": 140, "45": 232, "46": 142, "47": 177, "48": 239, "49": 188, "50": 140, "51": 228, "52": 185, "53": 159,
  323. "54": 32, "55": 164, "56": 1, "57": 50, "58": 15, "59": 49, "60": 52, "61": 49, "62": 53, "63": 48, "64": 55,
  324. "65": 54, "66": 54, "67": 54, "68": 45, "69": 54, "70": 48, "71": 48, "72": 55, "73": 50, "74": 56, "75": 1,
  325. "76": 64, "77": 1, "78": 72, "79": 1, "80": 80, "81": 224, "82": 167, "83": 18, "84": 96, "85": 232, "86": 7,
  326. "87": 130, "88": 1, "89": 8, "90": 68, "91": 82, "92": 111, "93": 67, "94": 70, "95": 119, "96": 61, "97": 61,
  327. "98": 74, "99": 70, "100": 18, "101": 24, "102": 10, "103": 11, "104": 68, "105": 68, "106": 55, "107": 53,
  328. "108": 51, "109": 49, "110": 49, "111": 52, "112": 55, "113": 56, "114": 52, "115": 18, "116": 9, "117": 230,
  329. "118": 136, "119": 180, "120": 230, "121": 157, "122": 177, "123": 230, "124": 157, "125": 177, "126": 32,
  330. "127": 164, "128": 1, "129": 50, "130": 15, "131": 55, "132": 53, "133": 51, "134": 49, "135": 49, "136": 52,
  331. "137": 55, "138": 56, "139": 52, "140": 45, "141": 49, "142": 48, "143": 54, "144": 53, "145": 51, "146": 56,
  332. "147": 1, "148": 64, "149": 1, "150": 72, "151": 1, "152": 80, "153": 224, "154": 167, "155": 18, "156": 96,
  333. "157": 232, "158": 7, "159": 130, "160": 1, "161": 8, "162": 56, "163": 47, "164": 106, "165": 43, "166": 89,
  334. "167": 103, "168": 61, "169": 61, "170": 32, "171": 240, "172": 218, "173": 176, "174": 165, "175": 207,
  335. "176": 45}
  336. message = MessageDecode(list(m.values()))
  337. if message.decode():
  338. message.feed_decode()
  339. if message.message.get('gift'):
  340. print('收到礼物:')
  341. print(message.message.get('gift'))
  342. elif message.message.get('user'):
  343. print('收到弹幕:')
  344. print(message.message.get('user'))