6 lat temu · d6b56912c4
--- a/xiaowu/spiders/food.py
+++ b/xiaowu/spiders/food.py
@@ -30,11 +30,12 @@ class FoodSpider(scrapy.Spider):
 
				             linkurl=(quote.xpath("./@href").extract_first())
			
 
				             imgsrc=(quote.xpath("./img/@src").extract_first())
			
 
				             spantitle=(quote.xpath("./span/text()").extract_first())
			
 
				-            if spantitle is None:
			
 
				-                spantitle=(quote.xpath("./text()").extract_first())
			
 
				-            if linkurl is not None:
			
 
				-                yield scrapy.Request(response.urljoin(linkurl),meta={'linkurl': linkurl,'imgsrc':imgsrc,'filename':filename,'spantitle':spantitle},callback=self.doparse)
			
 
				-                # fhtml.write(linkurl+ "\n")
			
 
				+            if imgsrc is not None:
			
 
				+                if spantitle is None:
			
 
				+                    spantitle=(quote.xpath("./text()").extract_first())
			
 
				+                if linkurl is not None:
			
 
				+                    yield scrapy.Request(response.urljoin(linkurl),meta={'linkurl': linkurl,'imgsrc':imgsrc,'filename':filename,'spantitle':spantitle},callback=self.doparse)
			
 
				+                    # fhtml.write(linkurl+ "\n")
			
 
				         # fhtml.close()
			
 
				         next_page_url = response.xpath('//span[@class="nextPage"]/a/@href').extract_first()
			
 
				         if next_page_url is not None:
			
--- a/xiaowu/spiders/foodEveryday.py
+++ b/xiaowu/spiders/foodEveryday.py
@@ -26,16 +26,17 @@ class quyaqu(scrapy.Spider):
 
				         for quote in response.xpath("//li/a"):
			
 
				             linkurl=(quote.xpath("./@href").extract_first())
			
 
				             imgsrc=(quote.xpath("./img/@src").extract_first())
			
 
				-            spantitle=(quote.xpath("./span/text()").extract_first())
			
 
				-            if spantitle is None:
			
 
				-                spantitle=(quote.xpath("./text()").extract_first())
			
 
				-            if linkurl is not None:
			
 
				-                yield scrapy.Request(response.urljoin(linkurl),meta={'linkurl': linkurl,'imgsrc':imgsrc,'filename':filename,'spantitle':spantitle},callback=self.doparse)
			
 
				-                # fhtml.write(linkurl+ "\n")
			
 
				-                # fhtml.close()
			
 
				-                # next_page_url = response.xpath('//span[@class="nextPage"]/a/@href').extract_first()
			
 
				-                # if next_page_url is not None:
			
 
				-                #     yield scrapy.Request(response.urljoin(next_page_url),callback=self.parse)
			
 
				+            if imgsrc is not None:
			
 
				+                spantitle=(quote.xpath("./span/text()").extract_first())
			
 
				+                if spantitle is None:
			
 
				+                    spantitle=(quote.xpath("./text()").extract_first())
			
 
				+                if linkurl is not None:
			
 
				+                    yield scrapy.Request(response.urljoin(linkurl),meta={'linkurl': linkurl,'imgsrc':imgsrc,'filename':filename,'spantitle':spantitle},callback=self.doparse)
			
 
				+                    # fhtml.write(linkurl+ "\n")
			
 
				+                    # fhtml.close()
			
 
				+                    # next_page_url = response.xpath('//span[@class="nextPage"]/a/@href').extract_first()
			
 
				+                    # if next_page_url is not None:
			
 
				+                    #     yield scrapy.Request(response.urljoin(next_page_url),callback=self.parse)
			
 
				     def doparse(self,response):
			
 
				         # filename = response.url.split("/")[-2]+str(self.index)
			
 
				         # self.index=int(self.index)+1