店播爬取Python脚本

live_commodity_detail_v1.py 3.8KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384
  1. import requests
  2. import time
  3. import random
  4. from libs.proxy import Proxy
  5. from log.print_log import PrintLog
  6. class LiveCommodityDetailV1:
  7. @staticmethod
  8. def get_data(product_id):
  9. url = 'http://ec.snssdk.com/product/lubanajaxstaticitem?id=' + product_id + '&page_id=&scope_type=5&item_id=&b_type_new=0'
  10. userAgentList = [
  11. 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.81 Safari/537.36 SE 2.X MetaSr 1.0',
  12. 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3908.2 Mobile Safari/537.36',
  13. 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116',
  14. 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36',
  15. 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:92.0) Gecko/20100101 Firefox/92.0',
  16. 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134',
  17. 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50',
  18. 'Mozilla/5.0 (Windows; U; Windows NT 6.1; ) AppleWebKit/534.12 (KHTML, like Gecko) Maxthon/3.0 Safari/534.12',
  19. 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.3; .NET4.0C; .NET4.0E; SE 2.X MetaSr 1.0)',
  20. 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41 Safari/535.1 QQBrowser/6.9.11079.201',
  21. 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.3; .NET4.0C; .NET4.0E)'
  22. ]
  23. headers = {
  24. 'Host': 'ec.snssdk.com',
  25. 'Connection': 'keep-alive',
  26. 'Cache-Control': 'max-age=0',
  27. 'User-Agent': random.choice(userAgentList),
  28. 'Referer': 'https://haohuo.jinritemai.com/',
  29. 'Accept': 'application/json, text/plain, */*',
  30. }
  31. while True:
  32. proxy = Proxy.get()
  33. proxies = {
  34. "http": "http://" + proxy,
  35. "https": "http://" + proxy
  36. }
  37. try:
  38. response = requests.get(
  39. url,
  40. headers=headers,
  41. proxies=proxies,
  42. timeout=10
  43. )
  44. if response.text is not None:
  45. break
  46. else:
  47. print(response)
  48. print('爬取http连接失败!')
  49. time.sleep(1)
  50. except requests.exceptions.ProxyError as e:
  51. PrintLog.print(
  52. time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + '代理过期!' + str(e) + '\n'
  53. + product_id + '\n'
  54. + Proxy.proxy_info
  55. )
  56. Proxy.del_proxy(proxy)
  57. pass
  58. except requests.exceptions.ConnectTimeout as e:
  59. PrintLog.print(
  60. time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' ConnectTimeout!' + str(e) + '\n'
  61. + product_id + '\n'
  62. + Proxy.proxy_info
  63. )
  64. Proxy.del_proxy(proxy)
  65. pass
  66. except Exception as e:
  67. PrintLog.print(
  68. time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' 请求抛出异常!' + str(e) + '\n'
  69. + product_id + '\n'
  70. + Proxy.proxy_info
  71. )
  72. pass
  73. return response.text