店播爬取Python脚本

en0414.py 40KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143
  1. import requests
  2. import uuid
  3. import random
  4. import time
  5. import json
  6. import hashlib
  7. from libs.aesgzip import tt_encrypt
  8. from urllib import parse
  9. from libs.proxy import Proxy
  10. from rds_model.db_redis import DbRedis
  11. def get_mc():
  12. def a():
  13. seed = "1234567890ABCDEF"
  14. sa = []
  15. for i in range(2):
  16. sa.append(random.choice(seed))
  17. salt = ''.join(sa)
  18. return salt
  19. k = ''
  20. for i in range(6):
  21. k += a() + ':'
  22. return k[:-1]
  23. def get_random(i, random_type=1):
  24. if random_type == 1:
  25. return str(random.randint(1 * 10 ** (i - 1), 1 * 10 ** i - 1))
  26. elif random_type == 8:
  27. seed = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
  28. sa = []
  29. for i in range(i):
  30. sa.append(random.choice(seed))
  31. salt = ''.join(sa)
  32. return salt
  33. else:
  34. seed = "1234567890abcde"
  35. sa = []
  36. for i in range(i):
  37. sa.append(random.choice(seed))
  38. salt = ''.join(sa)
  39. return salt
  40. def get_random_brand_type():
  41. brand_type = get_random(3, random_type=8) + '-' + get_random(2, random_type=8) + '00'
  42. return brand_type
  43. V1 = str(random.randint(5,8))
  44. V2 = str(random.randint(0,9))
  45. channel = 'oppo'
  46. device_type_list = ['VTR-AL00', 'KNT-AL10', 'PRA-AL00', 'DUA-TL00', 'PLK-AL10', 'KIW-AL10', 'LON-AL00', 'ANA-AN00',
  47. 'JSC-AN00', 'TAS-AN00', 'TAS-AL00']
  48. # device_type = 'GRL-CL00'
  49. device_type = get_random_brand_type()
  50. device_brand = 'HUAWEI'
  51. # Y79, Y66i, V7, X20, X20Plus, V7+, X9s L,V5s,Y66,Y30
  52. # device_type = random.choice(device_type_list)
  53. # channel_list = ['huawei', 'googleplay', 'douyin-huidu-guanwang-test', 'douyin_tengxun_wzl',
  54. # 'wandoujia_zhiwei', 'update']
  55. # channel = random.choice(channel_list)
  56. # print(channel, device_type)
  57. cookie = ''
  58. class DouYinApi:
  59. #USER_AGENT = 'okhttp/3.10.0.1'
  60. USER_AGENT = f'com.ss.android.ugc.aweme/{V1}{V2}0 (Linux; U; Android 5.1.1; zh_CN; {device_type}; Build/LMY47V; Cronet/58.0.2991.0)'
  61. COMMON_DEVICE_PARAMS = {
  62. 'retry_type': 'no_retry',
  63. 'ac': '4g',
  64. 'channel': channel,
  65. 'aid': '2329',
  66. 'app_name': 'aweme',
  67. 'version_code': f'{V1}{V2}0',
  68. 'version_name': f'{V1}.{V2}.0',
  69. 'device_platform': 'android',
  70. 'ssmix': 'a',
  71. 'device_type': device_type,
  72. 'device_brand': device_brand,
  73. 'language': 'zh',
  74. 'os_api': '25',
  75. 'os_version': '7.1.2',
  76. 'manifest_version_code': f'{V1}{V2}0',
  77. 'resolution': '720*1280',
  78. 'dpi': '192',
  79. 'update_version_code': f'{V1}{V2}02',
  80. 'mcc_mnc': '46000'
  81. }
  82. PROXY = {}
  83. def __init__(self, sessionid, proxies):
  84. """
  85. :param cid: client id
  86. """
  87. self.proxies = proxies
  88. self.__cid = ''
  89. self.__device_id = ''
  90. self.__iid = ''
  91. self.__uuid = ''
  92. self.__openudid = ''
  93. self.__device_params = {}
  94. self.__cookie = {
  95. }
  96. def init_device_ids(self, device_id, iid, udid, openudid, cc=None):
  97. """初始化设备id参数
  98. :param device_id: device id
  99. :param iid: install id
  100. :param udid: imei
  101. :param openudid: open udid
  102. :param serial_number: serial no
  103. :param clientudid: client udid
  104. :param sim_serial_number: sim serial number
  105. :param mc: mac address
  106. :return: none
  107. """
  108. self.__device_id = device_id
  109. self.__iid = iid
  110. self.__uuid = udid
  111. self.__openudid = openudid
  112. device_ids = {
  113. 'uuid': udid,
  114. 'openudid': openudid
  115. }
  116. if device_id and iid:
  117. device_ids.update({
  118. 'device_id': device_id,
  119. 'iid': iid,
  120. })
  121. self.__device_params = self.COMMON_DEVICE_PARAMS.copy()
  122. self.__device_params.update(device_ids)
  123. if cc:
  124. self.__cookie.update(cc)
  125. def __get_encrypted_device_info(self, device_id, openudid, udid, clientudid, serial_number, mac, iid):
  126. register_info = {
  127. "magic_tag": "ss_app_log",
  128. "header": {
  129. "display_name": "抖音短视频",
  130. "update_version_code": int(self.COMMON_DEVICE_PARAMS['update_version_code']),
  131. "manifest_version_code": int(self.COMMON_DEVICE_PARAMS['manifest_version_code']),
  132. "aid": '2329',
  133. "channel": self.COMMON_DEVICE_PARAMS['channel'],
  134. "appkey": "57bfa27c67e58e7d920028d3",
  135. "package": "com.ss.android.ugc.aweme",
  136. "app_version": self.COMMON_DEVICE_PARAMS['version_name'],
  137. "version_code": int(self.COMMON_DEVICE_PARAMS['version_code']),
  138. "sdk_version": "2.5.5.8",
  139. "os": "Android",
  140. "os_version": self.COMMON_DEVICE_PARAMS['os_version'],
  141. "os_api": self.COMMON_DEVICE_PARAMS['os_api'],
  142. "device_model": self.COMMON_DEVICE_PARAMS['device_type'],
  143. "device_brand": self.COMMON_DEVICE_PARAMS['device_brand'],
  144. "device_manufacturer": self.COMMON_DEVICE_PARAMS['device_brand'],
  145. "cpu_abi": "armeabi-v7a",
  146. "build_serial": serial_number,
  147. "release_build": "fbfabbc_20190522",
  148. "density_dpi": self.COMMON_DEVICE_PARAMS['dpi'],
  149. "display_density": "mdpi",
  150. "resolution": "1280x720",
  151. "language": "zh",
  152. "mc": mac,
  153. "timezone": 8,
  154. "access": "4G",
  155. "not_request_sender": 0,
  156. "rom": "EMUI-rel.se.infra.20191210.155925",
  157. "rom_version": "HUAWEI-user 7.1.2 20171130.276299 release-keys",
  158. "sig_hash": "aea615ab910015038f73c47e45d21466",
  159. # "device_id": str(device_id),
  160. "openudid": str(openudid),
  161. "udid": str(udid),
  162. "clientudid": str(clientudid),
  163. "serial_number": str(serial_number),
  164. "sim_serial_number": [
  165. ],
  166. "region": "CN",
  167. "tz_name": "Asia/Shanghai",
  168. "tz_offset": 28800
  169. },
  170. "_gen_time": str(round(time.time() * 1000))
  171. }
  172. if device_id:
  173. register_info['header']['device_id'] = str(device_id)
  174. if iid:
  175. register_info['header']['iid'] = str(iid)
  176. register_info['header']['push_sdk'] = '[1, 2, 6, 7, 8, 9]'
  177. return tt_encrypt((json.dumps(register_info)))
  178. def log_settings(self, ids):
  179. device_id = ids['device_id']
  180. openudid = ids['openudid']
  181. udid = ids['uuid']
  182. clientudid = ids['clientudid']
  183. serial_number = ids['serial_number']
  184. iid = ids['iid']
  185. mc = ids['mc']
  186. d = self.__get_encrypted_device_info(device_id, openudid, udid, clientudid, serial_number, mc, iid)
  187. url = 'https://log.snssdk.com/service/2/log_settings/?'
  188. url = self.__add_other_params(url, {})
  189. sign = self.__get_sign(url)
  190. if self.proxies:
  191. resp = requests.post(url, headers=self.__get_headers(sign), cookies=self.__cookie, proxies=self.proxies,
  192. data=d, verify=False, timeout=20)
  193. else:
  194. resp = requests.get(url, headers=self.__get_headers(sign), cookies=self.__cookie, data=d, verify=False, timeout=20)
  195. # print(resp.text)
  196. def xlog(self, device_id, iid, udid, openudid, serial_number):
  197. xlog = {
  198. "dpod": {
  199. "pod": ""
  200. },
  201. "p1": f"{device_id}",
  202. "p2": f"{iid}",
  203. "ut": 72,
  204. "ait": int(time.time() - 72),
  205. "pkg": "com.ss.android.ugc.aweme",
  206. "fp": "HUAWEI/HUAWEI/VOG-AL00:5.1.1/20171130.376229:user/release-keys",
  207. "vc": 0,
  208. "vpn": 0,
  209. "wifisid": "",
  210. "wifimac": "",
  211. "wifip": "",
  212. "aplist": [
  213. ],
  214. "route": {
  215. "iip": "0",
  216. "gip": "0",
  217. "ghw": "0",
  218. "type": "0"
  219. },
  220. "location": "",
  221. "apps": ["com.microvirt.guide", "com.android.browser", "com.ss.android.ugc.aweme"],
  222. "mdi_if": {
  223. },
  224. "extra": "install",
  225. "hw": {
  226. "brand": self.COMMON_DEVICE_PARAMS['device_brand'],
  227. "model": self.COMMON_DEVICE_PARAMS['device_type'],
  228. "board": self.COMMON_DEVICE_PARAMS['device_brand'],
  229. "device": self.COMMON_DEVICE_PARAMS['device_type'],
  230. "product": self.COMMON_DEVICE_PARAMS['device_type'],
  231. "display": "720*1280",
  232. "dpi": 0,
  233. "bat": 1000,
  234. "cpu": {
  235. "core": 2,
  236. "hw": "placeholder",
  237. "max": "2400000",
  238. "min": "1000000",
  239. "ft": "neon vfp swp half thumb fastmult edsp vfpv3"
  240. },
  241. "mem": {
  242. "ram": "2120286208",
  243. "rom": "8320901120"
  244. }
  245. },
  246. "id": {
  247. "i": 22,
  248. "r": "5.1.1",
  249. "imei": f"{udid}",
  250. "imsi": '46000' + str(self.__get_random(10)),
  251. "adid": f"{openudid}",
  252. "adid_ex": f"{openudid}",
  253. "mac": '',
  254. "serial": serial_number
  255. },
  256. "emulator": {
  257. },
  258. "env": {
  259. "ver": "0.6.02.17",
  260. "tag": "default",
  261. "pkg": "com.ss.android.ugc.aweme",
  262. "tz": "GMT+08:00",
  263. "ml": "zh_CN",
  264. "uid": 10045,
  265. "rebuild": 0,
  266. "jd": 0,
  267. "dbg": -1,
  268. "tid": 0,
  269. "xposed": 0,
  270. "hk": [
  271. "__system_property_get",
  272. "fopen",
  273. "fgets",
  274. "fread",
  275. "getpid",
  276. "gettid",
  277. "dlopen",
  278. "dlsym",
  279. "dladdr"],
  280. "su": 0,
  281. "sp": "/system/xbin/su",
  282. "ro.secure_s": "1",
  283. "ro.debuggable_s": "0",
  284. "click": "",
  285. "hph": "",
  286. "hpp": "",
  287. "mc": 0,
  288. "fc": 946933748,
  289. "jexp": 0,
  290. "frida": 0,
  291. "cydia": 0,
  292. "vapp": ""
  293. },
  294. "extension": {
  295. "bt": -1,
  296. "notify": 2212858197,
  297. "sign": "AEA615AB910015038F73C47E45D21466",
  298. "bytes64": "Aa1IrwSQLRXfmUvJokUpTkHJ9Yh1G4lpVgZSOaZeri+QVgSy80D5ID5UbH6x6xcSHsdjXqljImqg/LxoSbAeyqYZ67ZUTE23KmeyzHjTOeTtGogSEQY2/D8UWmY7Xt815Ch96RiLEgmzqnzCGZ0/N2AAaS5FynrMMfNv9jPlZgcZBjlWv5Ept44wrLJ+G1rJ7CzjL567O2ClTXKGTbHDTZsh91lWXmcNKzsh9PReifo2kf9f2MvJnJ9mN80soe7hw+/96olR5tTH6yVpsbuJt0NjxXI+y5yKp+GLueKCeYbG4QIqqI4buMEsjnPQCOm1bZtYxqeevC1aJVlNMdoQSZjZ7xqsWk8cNBuXqe/XXQua4QIqqI4buMEsjnPQCOm1bWat8qmH4OhJGLtb/aaNzHlXbo7P3DIJUsaOqMvCZvAN5apd9qaCQuvbISV7KJveRYfCRQGc0wiPPEaBEF7iO6qDRjlTDdNocBDAc6VdJS27EXEPHFPIH40PlLnx7vTDN6kUtGcG6G07pnlwwdAp6VZJskS5EmTY0MWyrPBOzBug+bRJCza4pcoC5A8ALNOUc39lxIAX7NuMJcf5rmTmkg8asiKpeeQHm2lSK5YNJkldp4+JLVggF/F+Dr6JjhyhERFxDxxTyB+ND5S58e70wzcNUOIjqUHt95eTwSOIFJvTZ+ZxXM4kDOGCeZXOBoqAfUbYgYfGrRZtcMdw1Lra0HN713ei+bcfoILrHoJO4fS3ZEq1jSU2d8XDEIid38oMwYrPyzXQOAQ/eRZlCiMSW83LMCj/4N0KpL/qlOzmB0uiBqKuF73Qhd0IfQTPpVCrwO6P/upwe1Q8yj+kXQBntfARcQ8cU8gfjQ+UufHu9MM3QpsZRTCVwNPhHp7m8lPb3/6YAZdknQzHMOVZp/LM1kf9MvhPN3heC2rp/QwWY1p+IPPe4wuRVVSO2vyJ8PhyDQxltLZjJW23706uyu+wGLFBiE7+4WBKMHWO3JmR6TWuHglclf+zh1CT21Wy5n/3Ex732O0WkYD01c/U27BcQ2Gb5RZdXYfhXTJe6VIzQnf02PCccLK2HnTdZ6tZ4qquKhkB0gKbMhtx/V5sBYe38cgU+5ct4Zy3fRwT/i5HkB2Wh6UlY2Ty1vIfaakVcR9dC7TKHDAWLIO0CPgBDeZFdb1zfIGyz50R3D9FNBoXkK6o5MRSDrM+pK5mlxZPJ0n56WiqDTEjAaxwEDWLejQFgyczb8DTeJnn5+nvyP/5jmfdv9sb7e8tpPlu02HsOtNzRLZRdpI3mgFYLPF0AJz2DHf90TVVUkETnF3ts+CqH34sIKT0zmoYjvpKGzdznMgRkJMPB3/Q4n/SZqJYVx9Bae0yYyFhtXt6XCdLgBPreyCy57dyB/svbMR4tvQ6K6W28yo9zGrt6gp20h6UzBfquQkOfB1SPLAqdyCyEcbja+ew+LgOejtotPR6lPJ7gUDbIoBURxkdLpINVc0RirdDBYeDaH3sZPli2YneQthTQCW91O6bRzlAEbljd7NTi3OomYuf+aUu8Y5rTjnyJAig+jSORIsyBHYeyrnBuAbFZUsvC5yRvAfEf6Uf1KWgGjcplhfxnBb4ocrYARhO5NCkCRg66GqLCXUDUpm2Gh2mN17vPXAF2ZH+W/hUtmsTs5zkJwks+vnWRDGs9QZWzkUUd2A/eHRxxHqx51yqZKT6v6uuF716rYsthonl01CqAJe7q0jq//hm7ZwCwWyY+GxkxON9PRgo36h0MOgFSURMf/C8pOUz21p3kXrPxmoPxXrisOD5U3oCiOe01fWZoxtAHCzgQpWtnDwqPVxohTwXyShv",
  299. },
  300. "rl": {
  301. },
  302. "ssp": {
  303. },
  304. "grilock": "eyJvcyI6IkFuZHJvaWQiLCJ2ZXJzaW9uIjoiMS4wLjAiLCJ0b2tlbl9pZCI6IiIsImNvZGUiOjUwNH0="
  305. }
  306. s = (json.dumps(xlog))
  307. url = f"https://xlog.snssdk.com/v2/r?os=0&ver=0.6.02.17&m=1&app_ver=5.5.0&region=CN&aid=2329&did={device_id}"
  308. data = {"post_d": s, "url": url}
  309. r = requests.post("http://192.168.77.186:18400/xlogr", data=str(data), timeout=20).text
  310. # print(r)
  311. def register_device(self):
  312. udid = '8678' + get_random(11)
  313. serial_number = "71623411"
  314. openudid = 'c2a5' + str(uuid.uuid4())[-12:]
  315. clientudid = str(uuid.uuid4())
  316. mc = get_mc()
  317. """获取设备信息,传参数注册老设备,不传参数注册新设备
  318. :return:
  319. """
  320. # serial_number = str(uuid.uuid4())[-12:]
  321. # if openudid is None:
  322. # openudid = 'fa23' + str(uuid.uuid4())[-12:]
  323. # clientudid = str(uuid.uuid4())
  324. # if udid is None:
  325. # udid = '861' + self.__get_random(12)
  326. # mc = self.__get_random_mac()
  327. # if device_id is None:
  328. # device_id = '825739' + self.__get_random(9)
  329. params = {
  330. 'uuid': udid,
  331. 'openudid': openudid,
  332. '_rticket': str(int(round(time.time() * 1000)))
  333. }
  334. requests.packages.urllib3.disable_warnings()
  335. params.update(self.COMMON_DEVICE_PARAMS)
  336. device_register_url = 'https://log.snssdk.com/service/2/device_register/?' + parse.urlencode(params)
  337. headers = {
  338. 'User-Agent': DouYinApi.USER_AGENT
  339. }
  340. d = self.__get_encrypted_device_info(None, openudid, udid, clientudid, serial_number, mc, iid=None)
  341. if self.proxies:
  342. resp = requests.post(device_register_url,
  343. data=d, proxies=self.proxies,
  344. headers=headers, verify=False, timeout=20)
  345. else:
  346. resp = requests.post(device_register_url,
  347. data=d,
  348. headers=headers, verify=False, timeout=20)
  349. cookie = resp.cookies.get_dict()
  350. if len(cookie) != 0:
  351. self.__cookie.update(cookie)
  352. # print(self.__cookie)
  353. resp = resp.json()
  354. ids = {
  355. 'new_user': resp['new_user'],
  356. 'device_id': str(resp['device_id']),
  357. 'iid': str(resp['install_id']),
  358. 'uuid': udid,
  359. 'openudid': openudid,
  360. 'serial_number': serial_number,
  361. 'clientudid': clientudid,
  362. 'mc': mc,
  363. 'cookie': parse.urlencode(self.__cookie)
  364. }
  365. return ids
  366. def app_log(self):
  367. params = {
  368. 'uuid': udid,
  369. 'openudid': openudid,
  370. '_rticket': str(int(round(time.time() * 1000)))
  371. }
  372. params.update(self.COMMON_DEVICE_PARAMS)
  373. device_register_url = 'https://log.snssdk.com/service/2/app_log/?' + parse.urlencode(params)
  374. headers = {
  375. 'User-Agent': DouYinApi.USER_AGENT
  376. }
  377. data = {"device_info": device_info, 'url': device_register_url, 'ck': ''}
  378. gorgon = requests.post("http://192.168.77.186:18400/app_log", data=str(data), timeout=20).json()
  379. d = tt_encrypt(device_info)
  380. if self.proxies:
  381. resp = requests.post(device_register_url,
  382. data=d, proxies=self.proxies,
  383. headers=headers, verify=False, timeout=20)
  384. else:
  385. resp = requests.post(device_register_url,
  386. data=d,
  387. headers=headers, verify=False, timeout=20)
  388. cookie = resp.cookies.get_dict()
  389. if len(cookie) != 0:
  390. self.__cookie.update(cookie)
  391. resp = resp.json()
  392. # print(resp)
  393. return
  394. def get_feed(self):
  395. """获取首页推荐列表
  396. """
  397. douyin_url = 'https://aweme-eagle.snssdk.com/aweme/v1/feed/?type=0&max_cursor=0&min_cursor=-1&count=6&volume=0.0&pull_type=2&need_relieve_aweme=0&filter_warn=0&req_from&is_cold_start=0'
  398. return self.__http_get(douyin_url)
  399. def get_nearby_feed(self, city_id):
  400. """获取对应城市的推荐列表
  401. :param cityid: 城市代码, 从https://wenku.baidu.com/view/af4281bafd0a79563c1e7287.html获取
  402. :return:
  403. """
  404. params = {
  405. 'city': city_id,
  406. }
  407. douyin_url = 'https://api.amemv.com/aweme/v1/nearby/feed/?max_cursor=0&min_cursor=0&count=20&feed_style=1&filter_warn=0&poi_class_code=0'
  408. return self.__http_get(douyin_url, params)
  409. def get_user_info(self, user_id):
  410. """获取用户信息
  411. :param user_id: 用户ID
  412. :return:
  413. """
  414. params = {
  415. 'user_id': user_id
  416. }
  417. douyin_url = 'https://aweme-eagle.snssdk.com/aweme/v1/user/?'
  418. return self.__http_get(douyin_url, params)
  419. def get_room_flv(self, room_id):
  420. """获取用户信息
  421. :param user_id: 用户ID
  422. :return:
  423. """
  424. params = {
  425. 'room_id': room_id,
  426. 'live_id':'1'
  427. }
  428. douyin_url = 'https://webcast-hl.amemv.com/webcast/room/reflow/info/?'
  429. return self.__http_get(douyin_url, params)
  430. def get_user_post(self, user_id, max_cursor, count):
  431. """获取用户作品
  432. :param user_id: 用户ID
  433. :param max_cursor: 用于分页,第1页是0,后1页是上1页请求的时候返回的max_cursor
  434. :param count: 返回视频的条数
  435. :return:
  436. """
  437. params = {
  438. 'user_id': user_id,
  439. 'max_cursor': str(max_cursor),
  440. 'count': str(count)
  441. }
  442. douyin_url = 'https://aweme.snssdk.com/aweme/v1/aweme/post/'
  443. return self.__http_get(douyin_url, params)
  444. def comm_get(self, douyin_url, params):
  445. """获取用户信息
  446. :param user_id: 用户ID
  447. :return:
  448. """
  449. return self.__http_get(douyin_url, params)
  450. def get_sec_user_favorite(self, sec_user_id, max_cursor, count):
  451. """获取用户作品
  452. :param user_id: 用户ID
  453. :param max_cursor: 用于分页,第1页是0,后1页是上1页请求的时候返回的max_cursor
  454. :param count: 返回视频的条数
  455. :return:
  456. """
  457. params = {
  458. 'sec_user_id': sec_user_id,
  459. 'max_cursor': str(max_cursor),
  460. 'count': str(count)
  461. }
  462. douyin_url = 'https://aweme-hl.snssdk.com/aweme/v1/aweme/favorite/?invalid_item_count=0&is_hiding_invalid_item=0&'
  463. return self.__http_get(douyin_url, params)
  464. def get_user_favorite(self, user_id, max_cursor, count):
  465. """获取用户作品
  466. :param user_id: 用户ID
  467. :param max_cursor: 用于分页,第1页是0,后1页是上1页请求的时候返回的max_cursor
  468. :param count: 返回视频的条数
  469. :return:
  470. """
  471. params = {
  472. 'user_id': user_id,
  473. 'max_cursor': str(max_cursor),
  474. 'count': str(count)
  475. }
  476. douyin_url = 'https://aweme-hl.snssdk.com/aweme/v1/aweme/favorite/?invalid_item_count=0&is_hiding_invalid_item=0&'
  477. return self.__http_get(douyin_url, params)
  478. def get_user_forward_list(self, user_id, max_cursor, count):
  479. """获取用户动态
  480. :param user_id: 用户ID
  481. :param max_cursor: 用于分页,第1页是0,后1页是上1页请求的时候返回的max_cursor
  482. :param count: 每次返回的动态条数
  483. :return:
  484. """
  485. params = {
  486. 'user_id': user_id,
  487. 'max_cursor': str(max_cursor),
  488. 'count': str(count)
  489. }
  490. douyin_url = 'https://aweme.snssdk.com/aweme/v1/forward/list/'
  491. return self.__http_get(douyin_url, params)
  492. def get_user_following_list(self, user_id, max_time, count):
  493. """获取用户关注列表 注意:关注列表请求太频繁会导致不返回数据
  494. :param user_id: 用户ID
  495. :param max_time: 用于分页,第1页是0,后1页是上1页请求时返回的min_time
  496. :param count: 每次返回的条数
  497. :return:
  498. """
  499. params = {
  500. 'user_id': user_id,
  501. 'max_time': str(int(time.time()) if max_time == 0 else max_time),
  502. 'count': str(count),
  503. 'source_type': '1'
  504. }
  505. douyin_url = 'https://aweme.snssdk.com/aweme/v1/user/following/list/'
  506. return self.__http_get(douyin_url, params)
  507. def get_user_follower_list(self, user_id, min_time, count):
  508. """获取用户粉丝列表
  509. :param user_id: 用户ID
  510. :param min_time: 用于分页,第1页是0,后1页是上1页请求时返回的min_time
  511. :param count: 每次返回的条数
  512. :return:
  513. """
  514. params = {
  515. 'user_id': user_id,
  516. 'max_time': str(int(time.time()) if min_time == 0 else min_time),
  517. 'count': str(count)
  518. }
  519. douyin_url = 'https://aweme.snssdk.com/aweme/v1/user/follower/list/'
  520. return self.__http_get(douyin_url, params)
  521. def get_user_favorite_list(self, user_id, min_time, count):
  522. """获取用户粉丝列表
  523. :param user_id: 用户ID
  524. :param min_time: 用于分页,第1页是0,后1页是上1页请求时返回的min_time
  525. :param count: 每次返回的条数
  526. :return:
  527. """
  528. params = {
  529. 'user_id': user_id,
  530. 'max_time': str(int(time.time()) if min_time == 0 else min_time),
  531. 'count': str(count)
  532. }
  533. douyin_url = 'https://aweme.snssdk.com/aweme/v1/user/follower/list/'
  534. return self.__http_get(douyin_url, params)
  535. def get_hot_search_list(self):
  536. """获取抖音热搜榜
  537. :return:
  538. """
  539. douyin_url = 'https://api.amemv.com/aweme/v1/hot/search/list/?detail_list=1'
  540. return self.__http_get(douyin_url)
  541. def get_hot_video_list(self):
  542. """获取抖音视频榜
  543. :return:
  544. """
  545. douyin_url = 'https://aweme.snssdk.com/aweme/v1/hotsearch/aweme/billboard/'
  546. return self.__http_get(douyin_url)
  547. def get_hot_music_list(self):
  548. """获取抖音音乐榜
  549. :return:
  550. """
  551. douyin_url = 'https://aweme.snssdk.com/aweme/v1/hotsearch/music/billboard/'
  552. return self.__http_get(douyin_url)
  553. def get_hot_positive_energy_list(self):
  554. """获取抖音正能量榜
  555. :return:
  556. """
  557. douyin_url = 'https://aweme.snssdk.com/aweme/v1/hotsearch/positive_energy/billboard/'
  558. return self.__http_get(douyin_url)
  559. def get_hot_category_list(self, cursor, count):
  560. """获取热门分类列表
  561. :param cursor: 分页用,第1页是0,下一页是上1页请求返回的cursor
  562. :param count: 每次返回的条数
  563. :return:
  564. """
  565. params = {
  566. 'cursor': str(cursor),
  567. 'count': str(count)
  568. }
  569. douyin_url = 'https://aweme.snssdk.com/aweme/v1/category/list/'
  570. return self.__http_get(douyin_url, params)
  571. def general_search(self, keyword, offset, count):
  572. """综合搜索
  573. :param keyword: 关键词
  574. :param offset: 分页,第1页是0,下1页是上1页请求返回的cursor
  575. :param count: 每次返回的条数
  576. :return:
  577. """
  578. params = {
  579. 'keyword': keyword,
  580. 'offset': str(offset),
  581. 'count': str(count),
  582. 'is_pull_refresh': '0',
  583. 'hot_search': '0',
  584. 'latitude': '0.0',
  585. 'longitude': '0.0'
  586. }
  587. douyin_url = 'https://aweme-hl.snssdk.com/aweme/v1/general/search/single/?'
  588. return self.__http_post(douyin_url, params)
  589. def item_search(self, keyword, offset, count):
  590. """综合搜索
  591. :param keyword: 关键词
  592. :param offset: 分页,第1页是0,下1页是上1页请求返回的cursor
  593. :param count: 每次返回的条数
  594. :return:
  595. """
  596. params = {
  597. 'query': keyword,
  598. 'cursor': str(offset),
  599. 'count': str(count),
  600. 'request_type': '1',
  601. 'search_filter': '1',
  602. 'search_source': 'commodity_search',
  603. 'enter_from': 'homepage_hot'
  604. }
  605. douyin_url = 'https://api5-normal-c-hl.amemv.com/aweme/v2/shop/search/aggregate/shopping/?'
  606. return self.__http_post(douyin_url, params)
  607. def video_search(self, keyword, offset, count):
  608. """ 视频搜索
  609. :param keyword: 关键词
  610. :param offset: 分页,第1页是0,下1页是上1页请求返回的cursor
  611. :param count: 每次返回的条数
  612. :return:
  613. """
  614. params = {
  615. 'keyword': keyword,
  616. 'offset': str(offset),
  617. 'count': str(count),
  618. 'is_pull_refresh': '0',
  619. 'hot_search': '0',
  620. 'source': 'video_search'
  621. }
  622. douyin_url = 'https://aweme-hl.snssdk.com/aweme/v1/search/item/?'
  623. return self.__http_post(douyin_url, params)
  624. def user_search(self, keyword, offset, count):
  625. """ 用户搜索
  626. :param keyword: 关键词
  627. :param offset: 分页,第1页是0,下1页是上1页请求返回的cursor
  628. :param count: 每次返回的条数
  629. :return:
  630. """
  631. params = {
  632. 'keyword': keyword,
  633. 'cursor': str(offset),
  634. 'count': str(count),
  635. 'type': '1',
  636. 'is_pull_refresh': '0',
  637. 'hot_search': '0',
  638. 'source': ''
  639. }
  640. douyin_url = 'https://aweme-hl.snssdk.com/aweme/v1/discover/search/?'
  641. return self.__http_post(douyin_url, params)
  642. def get_video_comment_list(self, aweme_id, cursor, count):
  643. """获取视频评论列表
  644. :param awemeId: 视频ID
  645. :param cursor: 分页, 第1页是0, 下1页是上1页请求返回的cursor
  646. :param count: 每次返回的条数
  647. :return:
  648. """
  649. params = {
  650. 'aweme_id': aweme_id,
  651. 'cursor': str(cursor),
  652. 'count': str(count)
  653. }
  654. douyin_url = 'https://aweme.snssdk.com/aweme/v2/comment/list/'
  655. return self.__http_get(douyin_url, params)
  656. def get_video_detail(self, aweme_id):
  657. """获取视频详情
  658. :param aweme_id: 视频ID
  659. :return:
  660. """
  661. params = {
  662. 'aweme_id': aweme_id
  663. }
  664. douyin_url = 'https://aweme.snssdk.com/aweme/v1/aweme/detail/'
  665. return self.__http_get(douyin_url, params)
  666. def get_music_detail(self, music_id):
  667. """获取音乐详情
  668. :param music_id: 音乐id
  669. :return:
  670. """
  671. params = {
  672. 'music_id': str(music_id),
  673. 'click_reason': '0'
  674. }
  675. douyin_url = 'https://aweme.snssdk.com/aweme/v1/music/detail/'
  676. return self.__http_get(douyin_url, params)
  677. def get_music_videos(self, music_id, cursor, count):
  678. """获取音乐对应的视频列表
  679. :param music_id: 音乐id
  680. :param cursor: 分页,首页是0,下一页是上一页请求返回的cursor
  681. :param count: 每次返回的条数
  682. :return:
  683. """
  684. params = {
  685. 'music_id': str(music_id),
  686. 'cursor': str(cursor),
  687. 'count': str(count),
  688. 'type': '6'
  689. }
  690. douyin_url = 'https://aweme.snssdk.com/aweme/v1/music/aweme/'
  691. return self.__http_get(douyin_url, params)
  692. def get_topic_videos(self, hashtag_name, cursor, count):
  693. """获取话题相关视频
  694. :param hashtag_name: 话题
  695. :param cursor: 分页,首页是0,下一页是上一页请求返回的cursor
  696. :param count: 每次返回的条数
  697. :return:
  698. """
  699. params = {
  700. 'cursor': str(cursor),
  701. 'count': str(count),
  702. 'source': 'challenge_video',
  703. 'hashtag_name': hashtag_name,
  704. 'type': '5',
  705. 'query_type': '1'
  706. }
  707. douyin_url = 'https://aweme-hl.snssdk.com/aweme/v1/challenge/aweme/'
  708. return self.__http_get(douyin_url, params)
  709. def get_promotion_list(self, user_id,sec_user_id, cursor, count):
  710. """获取商品橱窗列表
  711. :param user_id: user id
  712. :param cursor: 分页,首页是0,下一页是上一页请求返回的cursor
  713. :param count: 每次返回的条数
  714. :return:
  715. """
  716. params = {
  717. # 'sec_user_id':sec_user_id,
  718. 'count': str(count),
  719. 'cursor': str(cursor),
  720. 'user_id': str(user_id),
  721. 'retry_type': 'no_retry',
  722. 'ac': '4g',
  723. 'channel': channel,
  724. 'aid': '2329',
  725. 'app_name': 'aweme',
  726. 'version_code': f'{V1}{V2}0',
  727. 'version_name': f'{V1}.{V2}.0',
  728. 'device_platform': 'android',
  729. 'ssmix': 'a',
  730. 'device_type': device_type,
  731. 'device_brand': device_brand,
  732. 'language': 'zh',
  733. 'os_api': '25',
  734. 'os_version': '7.1.2',
  735. 'manifest_version_code': f'{V1}{V2}0',
  736. 'resolution': '720*1280',
  737. 'dpi': '192',
  738. 'update_version_code': f'{V1}{V2}02',
  739. 'mcc_mnc': '46000'
  740. }
  741. douyin_url = 'https://aweme.snssdk.com/aweme/v1/promotion/user/promotion/list/'
  742. return self.__http_get(douyin_url, params)
  743. def get_share_video_detail(self, share_url):
  744. """获取分享链接对应的视频信息
  745. :param share_url: 分享链接
  746. :return:
  747. """
  748. headers = {
  749. 'Accept': '*/*',
  750. 'Connection': 'keep-alive',
  751. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36'
  752. }
  753. resp = requests.get(share_url, headers=headers, timeout=20)
  754. url_path = parse.urlparse(resp.url).path
  755. video_id = url_path.split('/')[3]
  756. return self.get_video_detail(video_id)
  757. def get_webcast_room_info(self, room_id):
  758. """获取直播房间信息
  759. :param room_id:
  760. :return:
  761. """
  762. params = {
  763. 'pack_level': '4',
  764. 'room_id': str(room_id),
  765. 'webcast_sdk_version': '1710'
  766. }
  767. douyin_url = 'https://webcast.amemv.com/webcast/room/info/?'
  768. return self.__http_get(douyin_url, params)
  769. def get_webcast_user_info(self, room_id, user_id):
  770. """获取直播用户信息
  771. :param user_id:
  772. :return:
  773. """
  774. params = {
  775. 'request_from': 'admin',
  776. 'current_room_id': str(room_id),
  777. 'target_uid': str(user_id),
  778. 'anchor_id': str(user_id),
  779. 'packed_level': '2',
  780. 'webcast_sdk_version': '1150'
  781. }
  782. douyin_url = 'https://webcast.amemv.com/webcast/user/'
  783. return self.__http_get(douyin_url, params)
  784. def get_webcast_ranklist(self, room_id):
  785. """获取直播本场榜
  786. :param room_id:
  787. :param anchor_id:
  788. :return:
  789. """
  790. params = {
  791. 'room_id': str(room_id),
  792. 'rank_type': '17',
  793. 'webcast_sdk_version': '1150'
  794. }
  795. douyin_url = 'https://webcast.amemv.com/webcast/ranklist/room/' + str(room_id) + '/contributor/'
  796. return self.__http_get(douyin_url, params)
  797. def __get_random(self, len):
  798. return ''.join(str(random.choice(range(10))) for _ in range(len))
  799. def __get_msg(self, resp):
  800. return json.loads(resp.text)['msg']
  801. def __get_random_mac(self):
  802. mac = [0x10, 0x2a, 0xb3,
  803. random.randint(0x00, 0x7f),
  804. random.randint(0x00, 0xff),
  805. random.randint(0x00, 0xff)]
  806. return ':'.join(map(lambda x: "%02x" % x, mac))
  807. def __add_other_params(self, douyin_url, params=None):
  808. if params is None:
  809. params = {}
  810. if not douyin_url.__contains__('?'):
  811. douyin_url = douyin_url + '?'
  812. common_params = parse.urlencode(self.__device_params)
  813. if douyin_url.endswith('?') or douyin_url.endswith('&'):
  814. douyin_url = douyin_url + common_params
  815. else:
  816. douyin_url = douyin_url + '&' + common_params
  817. if len(params) > 0:
  818. douyin_url = douyin_url + '&' + parse.urlencode(params)
  819. douyin_url = douyin_url + "&_rticket=" + str(int(round(time.time() * 1000))) + "&ts=" + str(int(time.time()))
  820. return douyin_url
  821. def __get_cookie(self):
  822. if len(self.__cookie) == 0:
  823. return ''
  824. return "; ".join([str(x) + "=" + str(y) for x, y in self.__cookie.items()])
  825. def get_webcast_feed(self):
  826. params = {
  827. }
  828. douyin_url = 'https://webcast.amemv.com/webcast/feed/?'
  829. return self.__http_get(douyin_url, params)
  830. def __get_sign(self, url, form_params=None):
  831. stub = ''
  832. if form_params:
  833. a = parse.urlencode(form_params)
  834. stub = hashlib.md5(a.encode('utf-8')).hexdigest()
  835. ts = int(time.time())
  836. from libs.Xg04 import X_Gorgon
  837. params = url[url.index('?') + 1:]
  838. result = X_Gorgon(params, "", cookie)
  839. gorgon = result['X-Gorgon']
  840. # ppp = url[url.index('?') + 1:]
  841. # s = getXGon(ppp, stub, parse.urlencode(self.__cookie))
  842. # gorgon = xGorgon(ts, strToByte(s))
  843. #
  844. # data = {"url": url, "headers": {}}
  845. # gorgon = requests.post("http://121.36.217.134:18500/gorgon", data=str(data)).text
  846. sign = {
  847. 'X-Khronos': str(ts),
  848. 'X-Gorgon': gorgon,
  849. 'X-Pods': ''
  850. }
  851. # print(gorgon)
  852. if stub:
  853. sign.update({
  854. 'X-SS-STUB': stub.upper()
  855. })
  856. return sign
  857. def __get_headers(self, sign=None):
  858. if sign is None:
  859. sign = {}
  860. headers = {
  861. 'User-Agent': DouYinApi.USER_AGENT,
  862. 'X-SS-REQ-TICKET': str(round(time.time() * 1000)),
  863. # 'response-format':'protobuf'
  864. }
  865. headers.update(sign)
  866. return headers
  867. def __http_get(self, url, query_params=None):
  868. if query_params is None:
  869. query_params = {}
  870. url = self.__add_other_params(url, query_params)
  871. # print(url)
  872. sign = self.__get_sign(url)
  873. if self.proxies:
  874. resp = requests.get(url, headers=self.__get_headers(sign), cookies=self.__cookie, proxies=self.proxies,
  875. verify=False, timeout=20)
  876. else:
  877. resp = requests.get(url, headers=self.__get_headers(sign), cookies=self.__cookie, verify=False, timeout=20)
  878. cookie = resp.cookies.get_dict()
  879. if len(cookie) != 0:
  880. self.__cookie.update(cookie)
  881. # print(resp.text[:])
  882. return resp.json()
  883. def __http_post(self, url, form_params=None):
  884. if form_params is None:
  885. form_params = {}
  886. url = self.__add_other_params(url)
  887. sign = self.__get_sign(url, form_params)
  888. if self.proxies:
  889. resp = requests.post(url, headers=self.__get_headers(sign), data=form_params, cookies=self.__cookie, proxies=self.proxies,
  890. verify=False, timeout=20)
  891. else:
  892. resp = requests.post(url, headers=self.__get_headers(sign), data=form_params, cookies=self.__cookie,
  893. verify=False, timeout=20)
  894. cookie = resp.cookies.get_dict()
  895. if len(cookie) != 0:
  896. self.__cookie.update(cookie)
  897. # print(resp.text)
  898. return resp.text
  899. def comm_get(self, douyin_url, params={}, need_comm_param=True):
  900. """获取用户信息
  901. :param user_id: 用户ID
  902. :return:
  903. """
  904. if need_comm_param:
  905. return self.__http_get(douyin_url, params)
  906. else:
  907. params = parse.urlencode(params)
  908. url = douyin_url + params
  909. sign = self.__get_sign(douyin_url)
  910. if self.proxies:
  911. resp = requests.get(url, headers=self.__get_headers(sign), cookies=self.__cookie, proxies=self.proxies,
  912. verify=False, timeout=20)
  913. else:
  914. resp = requests.get(url, headers=self.__get_headers(sign), cookies=self.__cookie, verify=False, timeout=20)
  915. cookie = resp.cookies.get_dict()
  916. if len(cookie) != 0:
  917. self.__cookie.update(cookie)
  918. # print(resp.text)
  919. return resp.text
  920. def get_room_product_list(self, room_id, anchor_id, sec_author_id):
  921. """
  922. 获取直播间商品信息
  923. :return:
  924. """
  925. params = {
  926. 'room_id': str(room_id),
  927. 'anchor_id': str(anchor_id),
  928. 'sec_author_id': sec_author_id,
  929. }
  930. douyin_url = 'https://lianmengapi.snssdk.com/live/promotions/?'
  931. return self.__http_get(douyin_url, params)
  932. def get_product_info(self, promotion_id, product_id, author_id):
  933. """
  934. 获取商品信息
  935. :param promotion_id:商品id
  936. :return:
  937. """
  938. params = {
  939. "promotion_id": promotion_id,
  940. "product_id": product_id,
  941. "author_id": author_id,
  942. }
  943. douyin_url = 'https://api3-normal-c-lf.amemv.com/aweme/v2/shop/promotion/'
  944. return self.__http_post(douyin_url, params)
  945. def get_room_product_campaign(self, promotion_id):
  946. """
  947. 获取直播间商品信息
  948. :return:
  949. """
  950. params = {
  951. 'promotion_ids': str(promotion_id),
  952. }
  953. douyin_url = 'https://lianmengapi.snssdk.com/live/promotion/campaign/?'
  954. return self.__http_get(douyin_url, params)
  955. def getDevice(self):
  956. result = self.register_device()
  957. # print(result)
  958. device_id, iid, udid, openudid, cookie = result['device_id'], result['iid'], result['uuid'], result['openudid'], result['cookie']
  959. if device_id == '0':
  960. return '2532904814256759', '3166223513429303'
  961. self.init_device_ids(device_id, iid, udid, openudid)
  962. params = {
  963. "has_market": "0",
  964. "is_activated": '0'
  965. }
  966. self.comm_get('https://aweme.snssdk.com/service/2/app_alert/?', params)
  967. return device_id, iid
  968. if __name__ == '__main__':
  969. import warnings
  970. warnings.filterwarnings("ignore")
  971. for i in range(30):
  972. proxy = Proxy.get_yuanrenyun_list()
  973. print(proxy)
  974. proxies = {
  975. "http": "http://" + proxy,
  976. "https": "http://" + proxy
  977. }
  978. douApi = DouYinApi('', proxies)
  979. for i in range(10):
  980. try:
  981. result = douApi.register_device()
  982. print(result)
  983. device_id, iid, udid, openudid, cookie, cdid = result['device_id'], result['iid'], result['uuid'], \
  984. result['openudid'], result['cookie'], result['clientudid']
  985. if device_id == '0':
  986. continue
  987. douApi.init_device_ids(device_id, iid, udid, openudid)
  988. params = {
  989. "has_market": "0",
  990. "is_activated": '0'
  991. }
  992. douApi.comm_get('https://aweme.snssdk.com/service/2/app_alert/?', params)
  993. server_time = str(int(time.time()))
  994. data = device_id + '@@@' + iid + '@@@' + openudid + '@@@' + udid + '@@@' + cookie + '@@@' + server_time + '@@@' + cdid + '@@@' + proxy + '@@@' + V1 + '@@@' + V2 + '@@@' + device_type + '@@@' + device_brand
  995. print(data)
  996. key = 'DOUYIN_SCRAPE_DID_IID_TTREQ_0414'
  997. redis = DbRedis.kwai_connect().lpush(key, data)
  998. key = 'DOUYIN_SCRAPE_DID_IID_TTREQ_1221'
  999. if i > 10:
  1000. key = 'DOUYIN_SCRAPE_DID_IID_TTREQ_1221C'
  1001. redis = DbRedis.kwai_connect().lpush(key, data)
  1002. except Exception as e:
  1003. print(e)
  1004. continue