|
@@ -26,16 +26,17 @@ class quyaqu(scrapy.Spider):
|
26
|
26
|
for quote in response.xpath("//li/a"):
|
27
|
27
|
linkurl=(quote.xpath("./@href").extract_first())
|
28
|
28
|
imgsrc=(quote.xpath("./img/@src").extract_first())
|
29
|
|
- spantitle=(quote.xpath("./span/text()").extract_first())
|
30
|
|
- if spantitle is None:
|
31
|
|
- spantitle=(quote.xpath("./text()").extract_first())
|
32
|
|
- if linkurl is not None:
|
33
|
|
- yield scrapy.Request(response.urljoin(linkurl),meta={'linkurl': linkurl,'imgsrc':imgsrc,'filename':filename,'spantitle':spantitle},callback=self.doparse)
|
34
|
|
- # fhtml.write(linkurl+ "\n")
|
35
|
|
- # fhtml.close()
|
36
|
|
- # next_page_url = response.xpath('//span[@class="nextPage"]/a/@href').extract_first()
|
37
|
|
- # if next_page_url is not None:
|
38
|
|
- # yield scrapy.Request(response.urljoin(next_page_url),callback=self.parse)
|
|
29
|
+ if imgsrc is not None:
|
|
30
|
+ spantitle=(quote.xpath("./span/text()").extract_first())
|
|
31
|
+ if spantitle is None:
|
|
32
|
+ spantitle=(quote.xpath("./text()").extract_first())
|
|
33
|
+ if linkurl is not None:
|
|
34
|
+ yield scrapy.Request(response.urljoin(linkurl),meta={'linkurl': linkurl,'imgsrc':imgsrc,'filename':filename,'spantitle':spantitle},callback=self.doparse)
|
|
35
|
+ # fhtml.write(linkurl+ "\n")
|
|
36
|
+ # fhtml.close()
|
|
37
|
+ # next_page_url = response.xpath('//span[@class="nextPage"]/a/@href').extract_first()
|
|
38
|
+ # if next_page_url is not None:
|
|
39
|
+ # yield scrapy.Request(response.urljoin(next_page_url),callback=self.parse)
|
39
|
40
|
def doparse(self,response):
|
40
|
41
|
# filename = response.url.split("/")[-2]+str(self.index)
|
41
|
42
|
# self.index=int(self.index)+1
|