店播爬取Python脚本

en0414.py 40KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143
  1. import requests
  2. import uuid
  3. import random
  4. import time
  5. import json
  6. import hashlib
  7. from libs.aesgzip import tt_encrypt
  8. from urllib import parse
  9. from libs.proxy import Proxy
  10. from rds_model.db_redis import DbRedis
  11. def get_mc():
  12. def a():
  13. seed = "1234567890ABCDEF"
  14. sa = []
  15. for i in range(2):
  16. sa.append(random.choice(seed))
  17. salt = ''.join(sa)
  18. return salt
  19. k = ''
  20. for i in range(6):
  21. k += a() + ':'
  22. return k[:-1]
  23. def get_random(i, random_type=1):
  24. if random_type == 1:
  25. return str(random.randint(1 * 10 ** (i - 1), 1 * 10 ** i - 1))
  26. elif random_type == 8:
  27. seed = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
  28. sa = []
  29. for i in range(i):
  30. sa.append(random.choice(seed))
  31. salt = ''.join(sa)
  32. return salt
  33. else:
  34. seed = "1234567890abcde"
  35. sa = []
  36. for i in range(i):
  37. sa.append(random.choice(seed))
  38. salt = ''.join(sa)
  39. return salt
  40. def get_random_brand_type():
  41. brand_type = get_random(3, random_type=8) + '-' + get_random(2, random_type=8) + '00'
  42. return brand_type
  43. V1 = str(random.randint(5,8))
  44. V2 = str(random.randint(0,9))
  45. channel = 'oppo'
  46. device_type_list = ['VTR-AL00', 'KNT-AL10', 'PRA-AL00', 'DUA-TL00', 'PLK-AL10', 'KIW-AL10', 'LON-AL00', 'ANA-AN00',
  47. 'JSC-AN00', 'TAS-AN00', 'TAS-AL00']
  48. # device_type = 'GRL-CL00'
  49. device_type = get_random_brand_type()
  50. device_brand = 'HUAWEI'
  51. # Y79, Y66i, V7, X20, X20Plus, V7+, X9s L,V5s,Y66,Y30
  52. # device_type = random.choice(device_type_list)
  53. # channel_list = ['huawei', 'googleplay', 'douyin-huidu-guanwang-test', 'douyin_tengxun_wzl',
  54. # 'wandoujia_zhiwei', 'update']
  55. # channel = random.choice(channel_list)
  56. # print(channel, device_type)
  57. cookie = ''
  58. class DouYinApi:
  59. #USER_AGENT = 'okhttp/3.10.0.1'
  60. USER_AGENT = f'com.ss.android.ugc.aweme/{V1}{V2}0 (Linux; U; Android 5.1.1; zh_CN; {device_type}; Build/LMY47V; Cronet/58.0.2991.0)'
  61. COMMON_DEVICE_PARAMS = {
  62. 'retry_type': 'no_retry',
  63. 'ac': '4g',
  64. 'channel': channel,
  65. 'aid': '2329',
  66. 'app_name': 'aweme',
  67. 'version_code': f'{V1}{V2}0',
  68. 'version_name': f'{V1}.{V2}.0',
  69. 'device_platform': 'android',
  70. 'ssmix': 'a',
  71. 'device_type': device_type,
  72. 'device_brand': device_brand,
  73. 'language': 'zh',
  74. 'os_api': '25',
  75. 'os_version': '7.1.2',
  76. 'manifest_version_code': f'{V1}{V2}0',
  77. 'resolution': '720*1280',
  78. 'dpi': '192',
  79. 'update_version_code': f'{V1}{V2}02',
  80. 'mcc_mnc': '46000'
  81. }
  82. PROXY = {}
  83. def __init__(self, sessionid, proxies):
  84. """
  85. :param cid: client id
  86. """
  87. self.proxies = proxies
  88. self.__cid = ''
  89. self.__device_id = ''
  90. self.__iid = ''
  91. self.__uuid = ''
  92. self.__openudid = ''
  93. self.__device_params = {}
  94. self.__cookie = {
  95. }
  96. def init_device_ids(self, device_id, iid, udid, openudid, cc=None):
  97. """初始化设备id参数
  98. :param device_id: device id
  99. :param iid: install id
  100. :param udid: imei
  101. :param openudid: open udid
  102. :param serial_number: serial no
  103. :param clientudid: client udid
  104. :param sim_serial_number: sim serial number
  105. :param mc: mac address
  106. :return: none
  107. """
  108. self.__device_id = device_id
  109. self.__iid = iid
  110. self.__uuid = udid
  111. self.__openudid = openudid
  112. device_ids = {
  113. 'uuid': udid,
  114. 'openudid': openudid
  115. }
  116. if device_id and iid:
  117. device_ids.update({
  118. 'device_id': device_id,
  119. 'iid': iid,
  120. })
  121. self.__device_params = self.COMMON_DEVICE_PARAMS.copy()
  122. self.__device_params.update(device_ids)
  123. if cc:
  124. self.__cookie.update(cc)
  125. def __get_encrypted_device_info(self, device_id, openudid, udid, clientudid, serial_number, mac, iid):
  126. register_info = {
  127. "magic_tag": "ss_app_log",
  128. "header": {
  129. "display_name": "抖音短视频",
  130. "update_version_code": int(self.COMMON_DEVICE_PARAMS['update_version_code']),
  131. "manifest_version_code": int(self.COMMON_DEVICE_PARAMS['manifest_version_code']),
  132. "aid": '2329',
  133. "channel": self.COMMON_DEVICE_PARAMS['channel'],
  134. "appkey": "57bfa27c67e58e7d920028d3",
  135. "package": "com.ss.android.ugc.aweme",
  136. "app_version": self.COMMON_DEVICE_PARAMS['version_name'],
  137. "version_code": int(self.COMMON_DEVICE_PARAMS['version_code']),
  138. "sdk_version": "2.5.5.8",
  139. "os": "Android",
  140. "os_version": self.COMMON_DEVICE_PARAMS['os_version'],
  141. "os_api": self.COMMON_DEVICE_PARAMS['os_api'],
  142. "device_model": self.COMMON_DEVICE_PARAMS['device_type'],
  143. "device_brand": self.COMMON_DEVICE_PARAMS['device_brand'],
  144. "device_manufacturer": self.COMMON_DEVICE_PARAMS['device_brand'],
  145. "cpu_abi": "armeabi-v7a",
  146. "build_serial": serial_number,
  147. "release_build": "fbfabbc_20190522",
  148. "density_dpi": self.COMMON_DEVICE_PARAMS['dpi'],
  149. "display_density": "mdpi",
  150. "resolution": "1280x720",
  151. "language": "zh",
  152. "mc": mac,
  153. "timezone": 8,
  154. "access": "4G",
  155. "not_request_sender": 0,
  156. "rom": "EMUI-rel.se.infra.20191210.155925",
  157. "rom_version": "HUAWEI-user 7.1.2 20171130.276299 release-keys",
  158. "sig_hash": "aea615ab910015038f73c47e45d21466",
  159. # "device_id": str(device_id),
  160. "openudid": str(openudid),
  161. "udid": str(udid),
  162. "clientudid": str(clientudid),
  163. "serial_number": str(serial_number),
  164. "sim_serial_number": [
  165. ],
  166. "region": "CN",
  167. "tz_name": "Asia/Shanghai",
  168. "tz_offset": 28800
  169. },
  170. "_gen_time": str(round(time.time() * 1000))
  171. }
  172. if device_id:
  173. register_info['header']['device_id'] = str(device_id)
  174. if iid:
  175. register_info['header']['iid'] = str(iid)
  176. register_info['header']['push_sdk'] = '[1, 2, 6, 7, 8, 9]'
  177. return tt_encrypt((json.dumps(register_info)))
  178. def log_settings(self, ids):
  179. device_id = ids['device_id']
  180. openudid = ids['openudid']
  181. udid = ids['uuid']
  182. clientudid = ids['clientudid']
  183. serial_number = ids['serial_number']
  184. iid = ids['iid']
  185. mc = ids['mc']
  186. d = self.__get_encrypted_device_info(device_id, openudid, udid, clientudid, serial_number, mc, iid)
  187. url = 'https://log.snssdk.com/service/2/log_settings/?'
  188. url = self.__add_other_params(url, {})
  189. sign = self.__get_sign(url)
  190. if self.proxies:
  191. resp = requests.post(url, headers=self.__get_headers(sign), cookies=self.__cookie, proxies=self.proxies,
  192. data=d, verify=False, timeout=20)
  193. else:
  194. resp = requests.get(url, headers=self.__get_headers(sign), cookies=self.__cookie, data=d, verify=False, timeout=20)
  195. # print(resp.text)
  196. def xlog(self, device_id, iid, udid, openudid, serial_number):
  197. xlog = {
  198. "dpod": {
  199. "pod": ""
  200. },
  201. "p1": f"{device_id}",
  202. "p2": f"{iid}",
  203. "ut": 72,
  204. "ait": int(time.time() - 72),
  205. "pkg": "com.ss.android.ugc.aweme",
  206. "fp": "HUAWEI/HUAWEI/VOG-AL00:5.1.1/20171130.376229:user/release-keys",
  207. "vc": 0,
  208. "vpn": 0,
  209. "wifisid": "",
  210. "wifimac": "",
  211. "wifip": "",
  212. "aplist": [
  213. ],
  214. "route": {
  215. "iip": "0",
  216. "gip": "0",
  217. "ghw": "0",
  218. "type": "0"
  219. },
  220. "location": "",
  221. "apps": ["com.microvirt.guide", "com.android.browser", "com.ss.android.ugc.aweme"],
  222. "mdi_if": {
  223. },
  224. "extra": "install",
  225. "hw": {
  226. "brand": self.COMMON_DEVICE_PARAMS['device_brand'],
  227. "model": self.COMMON_DEVICE_PARAMS['device_type'],
  228. "board": self.COMMON_DEVICE_PARAMS['device_brand'],
  229. "device": self.COMMON_DEVICE_PARAMS['device_type'],
  230. "product": self.COMMON_DEVICE_PARAMS['device_type'],
  231. "display": "720*1280",
  232. "dpi": 0,
  233. "bat": 1000,
  234. "cpu": {
  235. "core": 2,
  236. "hw": "placeholder",
  237. "max": "2400000",
  238. "min": "1000000",
  239. "ft": "neon vfp swp half thumb fastmult edsp vfpv3"
  240. },
  241. "mem": {
  242. "ram": "2120286208",
  243. "rom": "8320901120"
  244. }
  245. },
  246. "id": {
  247. "i": 22,
  248. "r": "5.1.1",
  249. "imei": f"{udid}",
  250. "imsi": '46000' + str(self.__get_random(10)),
  251. "adid": f"{openudid}",
  252. "adid_ex": f"{openudid}",
  253. "mac": '',
  254. "serial": serial_number
  255. },
  256. "emulator": {
  257. },
  258. "env": {
  259. "ver": "0.6.02.17",
  260. "tag": "default",
  261. "pkg": "com.ss.android.ugc.aweme",
  262. "tz": "GMT+08:00",
  263. "ml": "zh_CN",
  264. "uid": 10045,
  265. "rebuild": 0,
  266. "jd": 0,
  267. "dbg": -1,
  268. "tid": 0,
  269. "xposed": 0,
  270. "hk": [
  271. "__system_property_get",
  272. "fopen",
  273. "fgets",
  274. "fread",
  275. "getpid",
  276. "gettid",
  277. "dlopen",
  278. "dlsym",
  279. "dladdr"],
  280. "su": 0,
  281. "sp": "/system/xbin/su",
  282. "ro.secure_s": "1",
  283. "ro.debuggable_s": "0",
  284. "click": "",
  285. "hph": "",
  286. "hpp": "",
  287. "mc": 0,
  288. "fc": 946933748,
  289. "jexp": 0,
  290. "frida": 0,
  291. "cydia": 0,
  292. "vapp": ""
  293. },
  294. "extension": {
  295. "bt": -1,
  296. "notify": 2212858197,
  297. "sign": "AEA615AB910015038F73C47E45D21466",
  298. "bytes64": "Aa1IrwSQLRXfmUvJokUpTkHJ9Yh1G4lpVgZSOaZeri+QVgSy80D5ID5UbH6x6xcSHsdjXqljImqg/LxoSbAeyqYZ67ZUTE23KmeyzHjTOeTtGogSEQY2/D8UWmY7Xt815Ch96RiLEgmzqnzCGZ0/N2AAaS5FynrMMfNv9jPlZgcZBjlWv5Ept44wrLJ+G1rJ7CzjL567O2ClTXKGTbHDTZsh91lWXmcNKzsh9PReifo2kf9f2MvJnJ9mN80soe7hw+/96olR5tTH6yVpsbuJt0NjxXI+y5yKp+GLueKCeYbG4QIqqI4buMEsjnPQCOm1bZtYxqeevC1aJVlNMdoQSZjZ7xqsWk8cNBuXqe/XXQua4QIqqI4buMEsjnPQCOm1bWat8qmH4OhJGLtb/aaNzHlXbo7P3DIJUsaOqMvCZvAN5apd9qaCQuvbISV7KJveRYfCRQGc0wiPPEaBEF7iO6qDRjlTDdNocBDAc6VdJS27EXEPHFPIH40PlLnx7vTDN6kUtGcG6G07pnlwwdAp6VZJskS5EmTY0MWyrPBOzBug+bRJCza4pcoC5A8ALNOUc39lxIAX7NuMJcf5rmTmkg8asiKpeeQHm2lSK5YNJkldp4+JLVggF/F+Dr6JjhyhERFxDxxTyB+ND5S58e70wzcNUOIjqUHt95eTwSOIFJvTZ+ZxXM4kDOGCeZXOBoqAfUbYgYfGrRZtcMdw1Lra0HN713ei+bcfoILrHoJO4fS3ZEq1jSU2d8XDEIid38oMwYrPyzXQOAQ/eRZlCiMSW83LMCj/4N0KpL/qlOzmB0uiBqKuF73Qhd0IfQTPpVCrwO6P/upwe1Q8yj+kXQBntfARcQ8cU8gfjQ+UufHu9MM3QpsZRTCVwNPhHp7m8lPb3/6YAZdknQzHMOVZp/LM1kf9MvhPN3heC2rp/QwWY1p+IPPe4wuRVVSO2vyJ8PhyDQxltLZjJW23706uyu+wGLFBiE7+4WBKMHWO3JmR6TWuHglclf+zh1CT21Wy5n/3Ex732O0WkYD01c/U27BcQ2Gb5RZdXYfhXTJe6VIzQnf02PCccLK2HnTdZ6tZ4qquKhkB0gKbMhtx/V5sBYe38cgU+5ct4Zy3fRwT/i5HkB2Wh6UlY2Ty1vIfaakVcR9dC7TKHDAWLIO0CPgBDeZFdb1zfIGyz50R3D9FNBoXkK6o5MRSDrM+pK5mlxZPJ0n56WiqDTEjAaxwEDWLejQFgyczb8DTeJnn5+nvyP/5jmfdv9sb7e8tpPlu02HsOtNzRLZRdpI3mgFYLPF0AJz2DHf90TVVUkETnF3ts+CqH34sIKT0zmoYjvpKGzdznMgRkJMPB3/Q4n/SZqJYVx9Bae0yYyFhtXt6XCdLgBPreyCy57dyB/svbMR4tvQ6K6W28yo9zGrt6gp20h6UzBfquQkOfB1SPLAqdyCyEcbja+ew+LgOejtotPR6lPJ7gUDbIoBURxkdLpINVc0RirdDBYeDaH3sZPli2YneQthTQCW91O6bRzlAEbljd7NTi3OomYuf+aUu8Y5rTjnyJAig+jSORIsyBHYeyrnBuAbFZUsvC5yRvAfEf6Uf1KWgGjcplhfxnBb4ocrYARhO5NCkCRg66GqLCXUDUpm2Gh2mN17vPXAF2ZH+W/hUtmsTs5zkJwks+vnWRDGs9QZWzkUUd2A/eHRxxHqx51yqZKT6v6uuF716rYsthonl01CqAJe7q0jq//hm7ZwCwWyY+GxkxON9PRgo36h0MOgFSURMf/C8pOUz21p3kXrPxmoPxXrisOD5U3oCiOe01fWZoxtAHCzgQpWtnDwqPVxohTwXyShv",
  299. },
  300. "rl": {
  301. },
  302. "ssp": {
  303. },
  304. "grilock": "eyJvcyI6IkFuZHJvaWQiLCJ2ZXJzaW9uIjoiMS4wLjAiLCJ0b2tlbl9pZCI6IiIsImNvZGUiOjUwNH0="
  305. }
  306. s = (json.dumps(xlog))
  307. url = f"https://xlog.snssdk.com/v2/r?os=0&ver=0.6.02.17&m=1&app_ver=5.5.0&region=CN&aid=2329&did={device_id}"
  308. data = {"post_d": s, "url": url}
  309. r = requests.post("http://192.168.77.186:18400/xlogr", data=str(data), timeout=20).text
  310. # print(r)
  311. def register_device(self):
  312. udid = '8678' + get_random(11)
  313. serial_number = "71623411"
  314. openudid = 'c2a5' + str(uuid.uuid4())[-12:]
  315. clientudid = str(uuid.uuid4())
  316. mc = get_mc()
  317. """获取设备信息,传参数注册老设备,不传参数注册新设备
  318. :return:
  319. """
  320. # serial_number = str(uuid.uuid4())[-12:]
  321. # if openudid is None:
  322. # openudid = 'fa23' + str(uuid.uuid4())[-12:]
  323. # clientudid = str(uuid.uuid4())
  324. # if udid is None:
  325. # udid = '861' + self.__get_random(12)
  326. # mc = self.__get_random_mac()
  327. # if device_id is None:
  328. # device_id = '825739' + self.__get_random(9)
  329. params = {
  330. 'uuid': udid,
  331. 'openudid': openudid,
  332. '_rticket': str(int(round(time.time() * 1000)))
  333. }
  334. params.update(self.COMMON_DEVICE_PARAMS)
  335. device_register_url = 'https://log.snssdk.com/service/2/device_register/?' + parse.urlencode(params)
  336. headers = {
  337. 'User-Agent': DouYinApi.USER_AGENT
  338. }
  339. d = self.__get_encrypted_device_info(None, openudid, udid, clientudid, serial_number, mc, iid=None)
  340. if self.proxies:
  341. resp = requests.post(device_register_url,
  342. data=d, proxies=self.proxies,
  343. headers=headers, verify=False, timeout=20)
  344. else:
  345. resp = requests.post(device_register_url,
  346. data=d,
  347. headers=headers, verify=False, timeout=20)
  348. cookie = resp.cookies.get_dict()
  349. if len(cookie) != 0:
  350. self.__cookie.update(cookie)
  351. # print(self.__cookie)
  352. resp = resp.json()
  353. ids = {
  354. 'new_user': resp['new_user'],
  355. 'device_id': str(resp['device_id']),
  356. 'iid': str(resp['install_id']),
  357. 'uuid': udid,
  358. 'openudid': openudid,
  359. 'serial_number': serial_number,
  360. 'clientudid': clientudid,
  361. 'mc': mc,
  362. 'cookie': parse.urlencode(self.__cookie)
  363. }
  364. return ids
  365. def app_log(self):
  366. params = {
  367. 'uuid': udid,
  368. 'openudid': openudid,
  369. '_rticket': str(int(round(time.time() * 1000)))
  370. }
  371. params.update(self.COMMON_DEVICE_PARAMS)
  372. device_register_url = 'https://log.snssdk.com/service/2/app_log/?' + parse.urlencode(params)
  373. headers = {
  374. 'User-Agent': DouYinApi.USER_AGENT
  375. }
  376. data = {"device_info": device_info, 'url': device_register_url, 'ck': ''}
  377. gorgon = requests.post("http://192.168.77.186:18400/app_log", data=str(data), timeout=20).json()
  378. d = tt_encrypt(device_info)
  379. if self.proxies:
  380. resp = requests.post(device_register_url,
  381. data=d, proxies=self.proxies,
  382. headers=headers, verify=False, timeout=20)
  383. else:
  384. resp = requests.post(device_register_url,
  385. data=d,
  386. headers=headers, verify=False, timeout=20)
  387. cookie = resp.cookies.get_dict()
  388. if len(cookie) != 0:
  389. self.__cookie.update(cookie)
  390. resp = resp.json()
  391. # print(resp)
  392. return
  393. def get_feed(self):
  394. """获取首页推荐列表
  395. """
  396. douyin_url = 'https://aweme-eagle.snssdk.com/aweme/v1/feed/?type=0&max_cursor=0&min_cursor=-1&count=6&volume=0.0&pull_type=2&need_relieve_aweme=0&filter_warn=0&req_from&is_cold_start=0'
  397. return self.__http_get(douyin_url)
  398. def get_nearby_feed(self, city_id):
  399. """获取对应城市的推荐列表
  400. :param cityid: 城市代码, 从https://wenku.baidu.com/view/af4281bafd0a79563c1e7287.html获取
  401. :return:
  402. """
  403. params = {
  404. 'city': city_id,
  405. }
  406. douyin_url = 'https://api.amemv.com/aweme/v1/nearby/feed/?max_cursor=0&min_cursor=0&count=20&feed_style=1&filter_warn=0&poi_class_code=0'
  407. return self.__http_get(douyin_url, params)
  408. def get_user_info(self, user_id):
  409. """获取用户信息
  410. :param user_id: 用户ID
  411. :return:
  412. """
  413. params = {
  414. 'user_id': user_id
  415. }
  416. douyin_url = 'https://aweme-eagle.snssdk.com/aweme/v1/user/?'
  417. return self.__http_get(douyin_url, params)
  418. def get_room_flv(self, room_id):
  419. """获取用户信息
  420. :param user_id: 用户ID
  421. :return:
  422. """
  423. params = {
  424. 'room_id': room_id,
  425. 'live_id':'1'
  426. }
  427. douyin_url = 'https://webcast-hl.amemv.com/webcast/room/reflow/info/?'
  428. return self.__http_get(douyin_url, params)
  429. def get_user_post(self, user_id, max_cursor, count):
  430. """获取用户作品
  431. :param user_id: 用户ID
  432. :param max_cursor: 用于分页,第1页是0,后1页是上1页请求的时候返回的max_cursor
  433. :param count: 返回视频的条数
  434. :return:
  435. """
  436. params = {
  437. 'user_id': user_id,
  438. 'max_cursor': str(max_cursor),
  439. 'count': str(count)
  440. }
  441. douyin_url = 'https://aweme.snssdk.com/aweme/v1/aweme/post/'
  442. return self.__http_get(douyin_url, params)
  443. def comm_get(self, douyin_url, params):
  444. """获取用户信息
  445. :param user_id: 用户ID
  446. :return:
  447. """
  448. return self.__http_get(douyin_url, params)
  449. def get_sec_user_favorite(self, sec_user_id, max_cursor, count):
  450. """获取用户作品
  451. :param user_id: 用户ID
  452. :param max_cursor: 用于分页,第1页是0,后1页是上1页请求的时候返回的max_cursor
  453. :param count: 返回视频的条数
  454. :return:
  455. """
  456. params = {
  457. 'sec_user_id': sec_user_id,
  458. 'max_cursor': str(max_cursor),
  459. 'count': str(count)
  460. }
  461. douyin_url = 'https://aweme-hl.snssdk.com/aweme/v1/aweme/favorite/?invalid_item_count=0&is_hiding_invalid_item=0&'
  462. return self.__http_get(douyin_url, params)
  463. def get_user_favorite(self, user_id, max_cursor, count):
  464. """获取用户作品
  465. :param user_id: 用户ID
  466. :param max_cursor: 用于分页,第1页是0,后1页是上1页请求的时候返回的max_cursor
  467. :param count: 返回视频的条数
  468. :return:
  469. """
  470. params = {
  471. 'user_id': user_id,
  472. 'max_cursor': str(max_cursor),
  473. 'count': str(count)
  474. }
  475. douyin_url = 'https://aweme-hl.snssdk.com/aweme/v1/aweme/favorite/?invalid_item_count=0&is_hiding_invalid_item=0&'
  476. return self.__http_get(douyin_url, params)
  477. def get_user_forward_list(self, user_id, max_cursor, count):
  478. """获取用户动态
  479. :param user_id: 用户ID
  480. :param max_cursor: 用于分页,第1页是0,后1页是上1页请求的时候返回的max_cursor
  481. :param count: 每次返回的动态条数
  482. :return:
  483. """
  484. params = {
  485. 'user_id': user_id,
  486. 'max_cursor': str(max_cursor),
  487. 'count': str(count)
  488. }
  489. douyin_url = 'https://aweme.snssdk.com/aweme/v1/forward/list/'
  490. return self.__http_get(douyin_url, params)
  491. def get_user_following_list(self, user_id, max_time, count):
  492. """获取用户关注列表 注意:关注列表请求太频繁会导致不返回数据
  493. :param user_id: 用户ID
  494. :param max_time: 用于分页,第1页是0,后1页是上1页请求时返回的min_time
  495. :param count: 每次返回的条数
  496. :return:
  497. """
  498. params = {
  499. 'user_id': user_id,
  500. 'max_time': str(int(time.time()) if max_time == 0 else max_time),
  501. 'count': str(count),
  502. 'source_type': '1'
  503. }
  504. douyin_url = 'https://aweme.snssdk.com/aweme/v1/user/following/list/'
  505. return self.__http_get(douyin_url, params)
  506. def get_user_follower_list(self, user_id, min_time, count):
  507. """获取用户粉丝列表
  508. :param user_id: 用户ID
  509. :param min_time: 用于分页,第1页是0,后1页是上1页请求时返回的min_time
  510. :param count: 每次返回的条数
  511. :return:
  512. """
  513. params = {
  514. 'user_id': user_id,
  515. 'max_time': str(int(time.time()) if min_time == 0 else min_time),
  516. 'count': str(count)
  517. }
  518. douyin_url = 'https://aweme.snssdk.com/aweme/v1/user/follower/list/'
  519. return self.__http_get(douyin_url, params)
  520. def get_user_favorite_list(self, user_id, min_time, count):
  521. """获取用户粉丝列表
  522. :param user_id: 用户ID
  523. :param min_time: 用于分页,第1页是0,后1页是上1页请求时返回的min_time
  524. :param count: 每次返回的条数
  525. :return:
  526. """
  527. params = {
  528. 'user_id': user_id,
  529. 'max_time': str(int(time.time()) if min_time == 0 else min_time),
  530. 'count': str(count)
  531. }
  532. douyin_url = 'https://aweme.snssdk.com/aweme/v1/user/follower/list/'
  533. return self.__http_get(douyin_url, params)
  534. def get_hot_search_list(self):
  535. """获取抖音热搜榜
  536. :return:
  537. """
  538. douyin_url = 'https://api.amemv.com/aweme/v1/hot/search/list/?detail_list=1'
  539. return self.__http_get(douyin_url)
  540. def get_hot_video_list(self):
  541. """获取抖音视频榜
  542. :return:
  543. """
  544. douyin_url = 'https://aweme.snssdk.com/aweme/v1/hotsearch/aweme/billboard/'
  545. return self.__http_get(douyin_url)
  546. def get_hot_music_list(self):
  547. """获取抖音音乐榜
  548. :return:
  549. """
  550. douyin_url = 'https://aweme.snssdk.com/aweme/v1/hotsearch/music/billboard/'
  551. return self.__http_get(douyin_url)
  552. def get_hot_positive_energy_list(self):
  553. """获取抖音正能量榜
  554. :return:
  555. """
  556. douyin_url = 'https://aweme.snssdk.com/aweme/v1/hotsearch/positive_energy/billboard/'
  557. return self.__http_get(douyin_url)
  558. def get_hot_category_list(self, cursor, count):
  559. """获取热门分类列表
  560. :param cursor: 分页用,第1页是0,下一页是上1页请求返回的cursor
  561. :param count: 每次返回的条数
  562. :return:
  563. """
  564. params = {
  565. 'cursor': str(cursor),
  566. 'count': str(count)
  567. }
  568. douyin_url = 'https://aweme.snssdk.com/aweme/v1/category/list/'
  569. return self.__http_get(douyin_url, params)
  570. def general_search(self, keyword, offset, count):
  571. """综合搜索
  572. :param keyword: 关键词
  573. :param offset: 分页,第1页是0,下1页是上1页请求返回的cursor
  574. :param count: 每次返回的条数
  575. :return:
  576. """
  577. params = {
  578. 'keyword': keyword,
  579. 'offset': str(offset),
  580. 'count': str(count),
  581. 'is_pull_refresh': '0',
  582. 'hot_search': '0',
  583. 'latitude': '0.0',
  584. 'longitude': '0.0'
  585. }
  586. douyin_url = 'https://aweme-hl.snssdk.com/aweme/v1/general/search/single/?'
  587. return self.__http_post(douyin_url, params)
  588. def item_search(self, keyword, offset, count):
  589. """综合搜索
  590. :param keyword: 关键词
  591. :param offset: 分页,第1页是0,下1页是上1页请求返回的cursor
  592. :param count: 每次返回的条数
  593. :return:
  594. """
  595. params = {
  596. 'query': keyword,
  597. 'cursor': str(offset),
  598. 'count': str(count),
  599. 'request_type': '1',
  600. 'search_filter': '1',
  601. 'search_source': 'commodity_search',
  602. 'enter_from': 'homepage_hot'
  603. }
  604. douyin_url = 'https://api5-normal-c-hl.amemv.com/aweme/v2/shop/search/aggregate/shopping/?'
  605. return self.__http_post(douyin_url, params)
  606. def video_search(self, keyword, offset, count):
  607. """ 视频搜索
  608. :param keyword: 关键词
  609. :param offset: 分页,第1页是0,下1页是上1页请求返回的cursor
  610. :param count: 每次返回的条数
  611. :return:
  612. """
  613. params = {
  614. 'keyword': keyword,
  615. 'offset': str(offset),
  616. 'count': str(count),
  617. 'is_pull_refresh': '0',
  618. 'hot_search': '0',
  619. 'source': 'video_search'
  620. }
  621. douyin_url = 'https://aweme-hl.snssdk.com/aweme/v1/search/item/?'
  622. return self.__http_post(douyin_url, params)
  623. def user_search(self, keyword, offset, count):
  624. """ 用户搜索
  625. :param keyword: 关键词
  626. :param offset: 分页,第1页是0,下1页是上1页请求返回的cursor
  627. :param count: 每次返回的条数
  628. :return:
  629. """
  630. params = {
  631. 'keyword': keyword,
  632. 'cursor': str(offset),
  633. 'count': str(count),
  634. 'type': '1',
  635. 'is_pull_refresh': '0',
  636. 'hot_search': '0',
  637. 'source': ''
  638. }
  639. douyin_url = 'https://aweme-hl.snssdk.com/aweme/v1/discover/search/?'
  640. return self.__http_post(douyin_url, params)
  641. def get_video_comment_list(self, aweme_id, cursor, count):
  642. """获取视频评论列表
  643. :param awemeId: 视频ID
  644. :param cursor: 分页, 第1页是0, 下1页是上1页请求返回的cursor
  645. :param count: 每次返回的条数
  646. :return:
  647. """
  648. params = {
  649. 'aweme_id': aweme_id,
  650. 'cursor': str(cursor),
  651. 'count': str(count)
  652. }
  653. douyin_url = 'https://aweme.snssdk.com/aweme/v2/comment/list/'
  654. return self.__http_get(douyin_url, params)
  655. def get_video_detail(self, aweme_id):
  656. """获取视频详情
  657. :param aweme_id: 视频ID
  658. :return:
  659. """
  660. params = {
  661. 'aweme_id': aweme_id
  662. }
  663. douyin_url = 'https://aweme.snssdk.com/aweme/v1/aweme/detail/'
  664. return self.__http_get(douyin_url, params)
  665. def get_music_detail(self, music_id):
  666. """获取音乐详情
  667. :param music_id: 音乐id
  668. :return:
  669. """
  670. params = {
  671. 'music_id': str(music_id),
  672. 'click_reason': '0'
  673. }
  674. douyin_url = 'https://aweme.snssdk.com/aweme/v1/music/detail/'
  675. return self.__http_get(douyin_url, params)
  676. def get_music_videos(self, music_id, cursor, count):
  677. """获取音乐对应的视频列表
  678. :param music_id: 音乐id
  679. :param cursor: 分页,首页是0,下一页是上一页请求返回的cursor
  680. :param count: 每次返回的条数
  681. :return:
  682. """
  683. params = {
  684. 'music_id': str(music_id),
  685. 'cursor': str(cursor),
  686. 'count': str(count),
  687. 'type': '6'
  688. }
  689. douyin_url = 'https://aweme.snssdk.com/aweme/v1/music/aweme/'
  690. return self.__http_get(douyin_url, params)
  691. def get_topic_videos(self, hashtag_name, cursor, count):
  692. """获取话题相关视频
  693. :param hashtag_name: 话题
  694. :param cursor: 分页,首页是0,下一页是上一页请求返回的cursor
  695. :param count: 每次返回的条数
  696. :return:
  697. """
  698. params = {
  699. 'cursor': str(cursor),
  700. 'count': str(count),
  701. 'source': 'challenge_video',
  702. 'hashtag_name': hashtag_name,
  703. 'type': '5',
  704. 'query_type': '1'
  705. }
  706. douyin_url = 'https://aweme-hl.snssdk.com/aweme/v1/challenge/aweme/'
  707. return self.__http_get(douyin_url, params)
  708. def get_promotion_list(self, user_id,sec_user_id, cursor, count):
  709. """获取商品橱窗列表
  710. :param user_id: user id
  711. :param cursor: 分页,首页是0,下一页是上一页请求返回的cursor
  712. :param count: 每次返回的条数
  713. :return:
  714. """
  715. params = {
  716. # 'sec_user_id':sec_user_id,
  717. 'count': str(count),
  718. 'cursor': str(cursor),
  719. 'user_id': str(user_id),
  720. 'retry_type': 'no_retry',
  721. 'ac': '4g',
  722. 'channel': channel,
  723. 'aid': '2329',
  724. 'app_name': 'aweme',
  725. 'version_code': f'{V1}{V2}0',
  726. 'version_name': f'{V1}.{V2}.0',
  727. 'device_platform': 'android',
  728. 'ssmix': 'a',
  729. 'device_type': device_type,
  730. 'device_brand': device_brand,
  731. 'language': 'zh',
  732. 'os_api': '25',
  733. 'os_version': '7.1.2',
  734. 'manifest_version_code': f'{V1}{V2}0',
  735. 'resolution': '720*1280',
  736. 'dpi': '192',
  737. 'update_version_code': f'{V1}{V2}02',
  738. 'mcc_mnc': '46000'
  739. }
  740. douyin_url = 'https://aweme.snssdk.com/aweme/v1/promotion/user/promotion/list/'
  741. return self.__http_get(douyin_url, params)
  742. def get_share_video_detail(self, share_url):
  743. """获取分享链接对应的视频信息
  744. :param share_url: 分享链接
  745. :return:
  746. """
  747. headers = {
  748. 'Accept': '*/*',
  749. 'Connection': 'keep-alive',
  750. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36'
  751. }
  752. resp = requests.get(share_url, headers=headers, timeout=20)
  753. url_path = parse.urlparse(resp.url).path
  754. video_id = url_path.split('/')[3]
  755. return self.get_video_detail(video_id)
  756. def get_webcast_room_info(self, room_id):
  757. """获取直播房间信息
  758. :param room_id:
  759. :return:
  760. """
  761. params = {
  762. 'pack_level': '4',
  763. 'room_id': str(room_id),
  764. 'webcast_sdk_version': '1710'
  765. }
  766. douyin_url = 'https://webcast.amemv.com/webcast/room/info/?'
  767. return self.__http_get(douyin_url, params)
  768. def get_webcast_user_info(self, room_id, user_id):
  769. """获取直播用户信息
  770. :param user_id:
  771. :return:
  772. """
  773. params = {
  774. 'request_from': 'admin',
  775. 'current_room_id': str(room_id),
  776. 'target_uid': str(user_id),
  777. 'anchor_id': str(user_id),
  778. 'packed_level': '2',
  779. 'webcast_sdk_version': '1150'
  780. }
  781. douyin_url = 'https://webcast.amemv.com/webcast/user/'
  782. return self.__http_get(douyin_url, params)
  783. def get_webcast_ranklist(self, room_id):
  784. """获取直播本场榜
  785. :param room_id:
  786. :param anchor_id:
  787. :return:
  788. """
  789. params = {
  790. 'room_id': str(room_id),
  791. 'rank_type': '17',
  792. 'webcast_sdk_version': '1150'
  793. }
  794. douyin_url = 'https://webcast.amemv.com/webcast/ranklist/room/' + str(room_id) + '/contributor/'
  795. return self.__http_get(douyin_url, params)
  796. def __get_random(self, len):
  797. return ''.join(str(random.choice(range(10))) for _ in range(len))
  798. def __get_msg(self, resp):
  799. return json.loads(resp.text)['msg']
  800. def __get_random_mac(self):
  801. mac = [0x10, 0x2a, 0xb3,
  802. random.randint(0x00, 0x7f),
  803. random.randint(0x00, 0xff),
  804. random.randint(0x00, 0xff)]
  805. return ':'.join(map(lambda x: "%02x" % x, mac))
  806. def __add_other_params(self, douyin_url, params=None):
  807. if params is None:
  808. params = {}
  809. if not douyin_url.__contains__('?'):
  810. douyin_url = douyin_url + '?'
  811. common_params = parse.urlencode(self.__device_params)
  812. if douyin_url.endswith('?') or douyin_url.endswith('&'):
  813. douyin_url = douyin_url + common_params
  814. else:
  815. douyin_url = douyin_url + '&' + common_params
  816. if len(params) > 0:
  817. douyin_url = douyin_url + '&' + parse.urlencode(params)
  818. douyin_url = douyin_url + "&_rticket=" + str(int(round(time.time() * 1000))) + "&ts=" + str(int(time.time()))
  819. return douyin_url
  820. def __get_cookie(self):
  821. if len(self.__cookie) == 0:
  822. return ''
  823. return "; ".join([str(x) + "=" + str(y) for x, y in self.__cookie.items()])
  824. def get_webcast_feed(self):
  825. params = {
  826. }
  827. douyin_url = 'https://webcast.amemv.com/webcast/feed/?'
  828. return self.__http_get(douyin_url, params)
  829. def __get_sign(self, url, form_params=None):
  830. stub = ''
  831. if form_params:
  832. a = parse.urlencode(form_params)
  833. stub = hashlib.md5(a.encode('utf-8')).hexdigest()
  834. ts = int(time.time())
  835. from lib_scraper.Xg04 import X_Gorgon
  836. params = url[url.index('?') + 1:]
  837. result = X_Gorgon(params, "", cookie)
  838. gorgon = result['X-Gorgon']
  839. # ppp = url[url.index('?') + 1:]
  840. # s = getXGon(ppp, stub, parse.urlencode(self.__cookie))
  841. # gorgon = xGorgon(ts, strToByte(s))
  842. #
  843. # data = {"url": url, "headers": {}}
  844. # gorgon = requests.post("http://121.36.217.134:18500/gorgon", data=str(data)).text
  845. sign = {
  846. 'X-Khronos': str(ts),
  847. 'X-Gorgon': gorgon,
  848. 'X-Pods': ''
  849. }
  850. # print(gorgon)
  851. if stub:
  852. sign.update({
  853. 'X-SS-STUB': stub.upper()
  854. })
  855. return sign
  856. def __get_headers(self, sign=None):
  857. if sign is None:
  858. sign = {}
  859. headers = {
  860. 'User-Agent': DouYinApi.USER_AGENT,
  861. 'X-SS-REQ-TICKET': str(round(time.time() * 1000)),
  862. # 'response-format':'protobuf'
  863. }
  864. headers.update(sign)
  865. return headers
  866. def __http_get(self, url, query_params=None):
  867. if query_params is None:
  868. query_params = {}
  869. url = self.__add_other_params(url, query_params)
  870. # print(url)
  871. sign = self.__get_sign(url)
  872. if self.proxies:
  873. resp = requests.get(url, headers=self.__get_headers(sign), cookies=self.__cookie, proxies=self.proxies,
  874. verify=False, timeout=20)
  875. else:
  876. resp = requests.get(url, headers=self.__get_headers(sign), cookies=self.__cookie, verify=False, timeout=20)
  877. cookie = resp.cookies.get_dict()
  878. if len(cookie) != 0:
  879. self.__cookie.update(cookie)
  880. # print(resp.text[:])
  881. return resp.json()
  882. def __http_post(self, url, form_params=None):
  883. if form_params is None:
  884. form_params = {}
  885. url = self.__add_other_params(url)
  886. sign = self.__get_sign(url, form_params)
  887. if self.proxies:
  888. resp = requests.post(url, headers=self.__get_headers(sign), data=form_params, cookies=self.__cookie, proxies=self.proxies,
  889. verify=False, timeout=20)
  890. else:
  891. resp = requests.post(url, headers=self.__get_headers(sign), data=form_params, cookies=self.__cookie,
  892. verify=False, timeout=20)
  893. cookie = resp.cookies.get_dict()
  894. if len(cookie) != 0:
  895. self.__cookie.update(cookie)
  896. # print(resp.text)
  897. return resp.text
  898. def comm_get(self, douyin_url, params={}, need_comm_param=True):
  899. """获取用户信息
  900. :param user_id: 用户ID
  901. :return:
  902. """
  903. if need_comm_param:
  904. return self.__http_get(douyin_url, params)
  905. else:
  906. params = parse.urlencode(params)
  907. url = douyin_url + params
  908. sign = self.__get_sign(douyin_url)
  909. if self.proxies:
  910. resp = requests.get(url, headers=self.__get_headers(sign), cookies=self.__cookie, proxies=self.proxies,
  911. verify=False, timeout=20)
  912. else:
  913. resp = requests.get(url, headers=self.__get_headers(sign), cookies=self.__cookie, verify=False, timeout=20)
  914. cookie = resp.cookies.get_dict()
  915. if len(cookie) != 0:
  916. self.__cookie.update(cookie)
  917. # print(resp.text)
  918. return resp.text
  919. def get_room_product_list(self, room_id, anchor_id, sec_author_id):
  920. """
  921. 获取直播间商品信息
  922. :return:
  923. """
  924. params = {
  925. 'room_id': str(room_id),
  926. 'anchor_id': str(anchor_id),
  927. 'sec_author_id': sec_author_id,
  928. }
  929. douyin_url = 'https://lianmengapi.snssdk.com/live/promotions/?'
  930. return self.__http_get(douyin_url, params)
  931. def get_product_info(self, promotion_id, product_id, author_id):
  932. """
  933. 获取商品信息
  934. :param promotion_id:商品id
  935. :return:
  936. """
  937. params = {
  938. "promotion_id": promotion_id,
  939. "product_id": product_id,
  940. "author_id": author_id,
  941. }
  942. douyin_url = 'https://api3-normal-c-lf.amemv.com/aweme/v2/shop/promotion/'
  943. return self.__http_post(douyin_url, params)
  944. def get_room_product_campaign(self, promotion_id):
  945. """
  946. 获取直播间商品信息
  947. :return:
  948. """
  949. params = {
  950. 'promotion_ids': str(promotion_id),
  951. }
  952. douyin_url = 'https://lianmengapi.snssdk.com/live/promotion/campaign/?'
  953. return self.__http_get(douyin_url, params)
  954. def getDevice(self):
  955. result = self.register_device()
  956. # print(result)
  957. device_id, iid, udid, openudid, cookie = result['device_id'], result['iid'], result['uuid'], result['openudid'], result['cookie']
  958. if device_id == '0':
  959. return '2532904814256759', '3166223513429303'
  960. self.init_device_ids(device_id, iid, udid, openudid)
  961. params = {
  962. "has_market": "0",
  963. "is_activated": '0'
  964. }
  965. self.comm_get('https://aweme.snssdk.com/service/2/app_alert/?', params)
  966. return device_id, iid
  967. if __name__ == '__main__':
  968. import warnings
  969. warnings.filterwarnings("ignore")
  970. for i in range(30):
  971. proxy = Proxy.get_yuanrenyun_list()
  972. print(proxy)
  973. proxies = {
  974. "http": "http://" + proxy,
  975. "https": "http://" + proxy
  976. }
  977. douApi = DouYinApi('', proxies)
  978. for i in range(10):
  979. try:
  980. result = douApi.register_device()
  981. print(result)
  982. device_id, iid, udid, openudid, cookie, cdid = result['device_id'], result['iid'], result['uuid'], \
  983. result['openudid'], result['cookie'], result['clientudid']
  984. if device_id == '0':
  985. continue
  986. douApi.init_device_ids(device_id, iid, udid, openudid)
  987. params = {
  988. "has_market": "0",
  989. "is_activated": '0'
  990. }
  991. douApi.comm_get('https://aweme.snssdk.com/service/2/app_alert/?', params)
  992. server_time = str(int(time.time()))
  993. data = device_id + '@@@' + iid + '@@@' + openudid + '@@@' + udid + '@@@' + cookie + '@@@' + server_time + '@@@' + cdid + '@@@' + proxy + '@@@' + V1 + '@@@' + V2 + '@@@' + device_type + '@@@' + device_brand
  994. print(data)
  995. key = 'DOUYIN_SCRAPE_DID_IID_TTREQ_0414'
  996. redis = DbRedis.kwai_connect().lpush(key, data)
  997. key = 'DOUYIN_SCRAPE_DID_IID_TTREQ_1221'
  998. if i > 10:
  999. key = 'DOUYIN_SCRAPE_DID_IID_TTREQ_1221C'
  1000. redis = DbRedis.kwai_connect().lpush(key, data)
  1001. except Exception as e:
  1002. print(e)
  1003. continue