123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186 |
- import requests,json,random,execjs,uuid
- from urllib import parse
- from urllib.parse import quote
- # from libs.proxy import Proxy
- # -- coding: utf-8 --**
- class WebDouYin:
- def __init__(self):
- self.proxies = {
- }
- def get_signature(self,url=None,method='_signature'):
- with open('/mnt/shop_live_scraper/signature.js', 'r', encoding='utf-8') as f:
- b = f.read()
-
- c = execjs.compile(b)
-
- # url=url.replace('%28','(').replace('%29',')').replace('%2C',',')
- d = c.call(method, url.replace('\n',''))
-
- # print('_signature',d)
- return d
- def get_ck(self, proxy=None):
- requests.packages.urllib3.disable_warnings()
- headers = {
- 'authority': 'www.douyin.com',
- 'method': 'GET',
- 'path': '/',
- 'scheme': 'https',
- 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
- 'accept-encoding': 'gzip, deflate, br',
- 'accept-language': 'zh-CN,zh;q=0.9',
- 'upgrade-insecure-requests': '1',
- 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36',
- }
- # res = requests.get('https://www.douyin.com/', headers=headers,verify=False, proxies=proxy,timeout=8)
- res = requests.get("https://www.douyin.com/user/MS4wLjABAAAAKpCGhwidAtgmUXmYIT0zjp2QpGquUaOCEeVPE6_gHjQ", headers=headers,verify=False, proxies=proxy,timeout=8)
- cookie_dict=res.cookies.get_dict()
- ck=''
- if cookie_dict:
- for k,v in cookie_dict.items():
- ck+='%s=%s; '%(k,v)
- ck=ck[:-2]
- else:
- return None
- ck+='; passport_csrf_token='+str(uuid.uuid4()).replace('-','')
- # print(ck)
- return ck
- def get_ua_ck(self,type_name=None):
- ua_list=[
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
- "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36",
- "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3870.400 QQBrowser/10.8.4405.400"
- ]
- ck_list=[
- 'ttwid=1%7CTVzdM0P0u-8dtsmh6c-EaQEtBoTSOs_MG85FAg07AbA%7C1631502013%7C66442d8594de8e93ad18b73f3dfe0c94ed864c3d932824bcde9918b5be172321; passport_csrf_token=866923f1a32045fd82e47053158402a2',
- 'ttwid=1%7CGPDDu9-w3RGs2Pcd0wRlvLYoktpDt-v8LP5ZMyb1NBM%7C1630319594%7Cffb8de47e6da87dcfd76349b5ad34aa1f9b9d4332261a3a8436b932a893366c1; passport_csrf_token=79284b8777a7a54f3066cefef9af539e',
- 'ttwid=1%7CGsfqc7NpdOg4N-U-VX7Q77KsWjVTZ7gxLNifsisj8YE%7C1631618570%7Cafbb13a27fd1c2d7a8245454b1e0d7cd654d80848a320933a25d9ef77638c18c; passport_csrf_token=84911c9af94040a99cc10416bd27533d',
- 'ttwid=1%7C82FGr05YUOReYUB301ao_erqOQ3ilbXZdEy0tkMsdXY%7C1631863641%7C1dcebe643a96f00841a3b490db60de886bfe07ff3d276e509717abc4e1681ba6; passport_csrf_token=494ae3fffe00328101fd40e050ce49db',
- 'ttwid=1%7CwfnX3T9LY4_60iGoQNzyqYe5ahILFeRxfMuZ1pdgXf8%7C1632724192%7Cb613fddc0b533d5578dad4d5f9290705fdc6432aa854d492f4761d164dd3fdd5; passport_csrf_token=4a8afba333103b033e537003b72ee91b'
- ]
- return random.choice(ua_list),random.choice(ck_list)
- def response(self,url,headers,proxy,data=None):
- # -- coding: utf-8 --**
- try:
- requests.packages.urllib3.disable_warnings()
- if data:
- res = requests.post(url, headers=headers,verify=False, proxies=proxy,data=data, timeout=8)
- else:
- res=requests.get(url,headers=headers,verify=False,proxies=proxy,timeout=8)
- # print(res)
- json_data=json.loads(res.content.decode())
- except Exception as e:
- json_data = "请求失败" + str(e)
- print(json_data)
- return json_data
- def get_user_videos(self,sec_user_id,max_cursor=0,count=10,proxy=None,cookie=None):
-
- ua,ck=self.get_ua_ck('get_user_videos')
- if cookie:
- ck = cookie
-
- url='https://www.douyin.com/aweme/v1/web/aweme/post/?'
- param={
- 'device_platform': 'webapp',
- 'aid': '6383',
- 'channel': 'channel_pc_web',
- 'sec_user_id': sec_user_id,
- 'max_cursor': str(max_cursor),
- 'count': str(count),
- 'publish_video_strategy_type': '2',
- 'version_code': '160100',
- 'version_name': '16.1.0',
- 'cookie_enabled': 'true',
- 'screen_width': '1920',
- 'screen_height': '1080',
- 'browser_language': 'zh-CN',
- 'browser_platform': 'Win32',
- 'browser_name': 'Mozilla',
- 'browser_version':ua.replace('Mozilla/',''),
- 'browser_online': 'true',
- }
- url = url + parse.urlencode(param)
-
- _signature = self.get_signature(url)
- url+='&_signature='+quote(_signature)
-
- headers = {
- "authority": "www.douyin.com",
- "method": "GET",
- "path": str(url).replace('https://www.douyin.com',''),
- "scheme": "https",
- "accept": "application/json, text/plain, */*",
- # "accept-encoding": "gzip, deflate, br",
- "accept-language": "zh-CN,zh;q=0.9",
- "cookie": ck,
- "referer": "https://www.douyin.com/user/{sec_user_id}?enter_method=search_result&enter_from=search_result".format(sec_user_id=sec_user_id),
- "user-agent":ua,
- "withcredentials": "true",
- }
- if ck:headers['cookie']=ck
- json_data=self.response(url=url,headers=headers,proxy=proxy)
-
- return json_data
- # print(json_data)
- def get_user_info(self,sec_user_id,proxy=None):
- ua,ck=self.get_ua_ck()
- # 随机获取一个cookie
- url='https://www.douyin.com/aweme/v1/web/user/profile/other/?'
- param={
- 'device_platform': 'webapp',
- 'aid': '6383',
- 'channel': 'channel_pc_web',
- 'publish_video_strategy_type': '2',
- 'source': 'channel_pc_web',
- 'sec_user_id': sec_user_id,
- 'version_code': '160100',
- 'version_name': '16.1.0',
- 'cookie_enabled': 'true',
- 'screen_width': '1920',
- 'screen_height': '1080',
- 'browser_language': 'zh-CN',
- 'browser_platform': 'Win32',
- 'browser_name': 'Mozilla',
- 'browser_version':ua.replace('Mozilla/',''),
- 'browser_online': 'true',
- }
- url = url + parse.urlencode(param)
- _signature = self.get_signature(url)
- url+='&_signature='+quote(_signature)
- headers = {
- "authority": "www.douyin.com",
- "method": "GET",
- "path": str(url).replace('https://www.douyin.com',''),
- "scheme": "https",
- "accept": "application/json, text/plain, */*",
- "accept-language": "zh-CN,zh;q=0.9",
- "cookie": ck,
- "referer": "https://www.douyin.com/user/{sec_user_id}?enter_method=search_result&enter_from=search_result".format(sec_user_id=sec_user_id),
- "user-agent":ua,
- "withcredentials": "true",
- }
- if ck:headers['cookie']=ck
- json_data=self.response(url=url,headers=headers,proxy=proxy)
- return json_data
- if __name__ == '__main__':
- webdy=WebDouYin()
- # res = webdy.get_user_videos('MS4wLjABAAAAqLPgx-hHf27EqGEtRQ6YyuQQTmikB5CBO1jXy61yhWKujGd8KO5G8V2vdcLQJAym')
- #print(res)
- res =webdy.get_user_info('MS4wLjABAAAA2sPiyVAbQc3FsbJJeuyvZkuLjmPRpfKgCvuf41TdjII')
- print(res)
- # proxy = Proxy.get()
- # print(proxy)
- # proxies = {
- # "http": "http://" + proxy,
- # "https": "http://" + proxy
- # }
- # ck = webdy.get_ck()
- # print(ck)
|