13283339616 6 anni fa
parent
commit
d6b56912c4
2 ha cambiato i file con 17 aggiunte e 15 eliminazioni
  1. 6 5
      xiaowu/spiders/food.py
  2. 11 10
      xiaowu/spiders/foodEveryday.py

+ 6 - 5
xiaowu/spiders/food.py

@@ -30,11 +30,12 @@ class FoodSpider(scrapy.Spider):
30 30
             linkurl=(quote.xpath("./@href").extract_first())
31 31
             imgsrc=(quote.xpath("./img/@src").extract_first())
32 32
             spantitle=(quote.xpath("./span/text()").extract_first())
33
-            if spantitle is None:
34
-                spantitle=(quote.xpath("./text()").extract_first())
35
-            if linkurl is not None:
36
-                yield scrapy.Request(response.urljoin(linkurl),meta={'linkurl': linkurl,'imgsrc':imgsrc,'filename':filename,'spantitle':spantitle},callback=self.doparse)
37
-                # fhtml.write(linkurl+ "\n")
33
+            if imgsrc is not None:
34
+                if spantitle is None:
35
+                    spantitle=(quote.xpath("./text()").extract_first())
36
+                if linkurl is not None:
37
+                    yield scrapy.Request(response.urljoin(linkurl),meta={'linkurl': linkurl,'imgsrc':imgsrc,'filename':filename,'spantitle':spantitle},callback=self.doparse)
38
+                    # fhtml.write(linkurl+ "\n")
38 39
         # fhtml.close()
39 40
         next_page_url = response.xpath('//span[@class="nextPage"]/a/@href').extract_first()
40 41
         if next_page_url is not None:

+ 11 - 10
xiaowu/spiders/foodEveryday.py

@@ -26,16 +26,17 @@ class quyaqu(scrapy.Spider):
26 26
         for quote in response.xpath("//li/a"):
27 27
             linkurl=(quote.xpath("./@href").extract_first())
28 28
             imgsrc=(quote.xpath("./img/@src").extract_first())
29
-            spantitle=(quote.xpath("./span/text()").extract_first())
30
-            if spantitle is None:
31
-                spantitle=(quote.xpath("./text()").extract_first())
32
-            if linkurl is not None:
33
-                yield scrapy.Request(response.urljoin(linkurl),meta={'linkurl': linkurl,'imgsrc':imgsrc,'filename':filename,'spantitle':spantitle},callback=self.doparse)
34
-                # fhtml.write(linkurl+ "\n")
35
-                # fhtml.close()
36
-                # next_page_url = response.xpath('//span[@class="nextPage"]/a/@href').extract_first()
37
-                # if next_page_url is not None:
38
-                #     yield scrapy.Request(response.urljoin(next_page_url),callback=self.parse)
29
+            if imgsrc is not None:
30
+                spantitle=(quote.xpath("./span/text()").extract_first())
31
+                if spantitle is None:
32
+                    spantitle=(quote.xpath("./text()").extract_first())
33
+                if linkurl is not None:
34
+                    yield scrapy.Request(response.urljoin(linkurl),meta={'linkurl': linkurl,'imgsrc':imgsrc,'filename':filename,'spantitle':spantitle},callback=self.doparse)
35
+                    # fhtml.write(linkurl+ "\n")
36
+                    # fhtml.close()
37
+                    # next_page_url = response.xpath('//span[@class="nextPage"]/a/@href').extract_first()
38
+                    # if next_page_url is not None:
39
+                    #     yield scrapy.Request(response.urljoin(next_page_url),callback=self.parse)
39 40
     def doparse(self,response):
40 41
         # filename = response.url.split("/")[-2]+str(self.index)
41 42
         # self.index=int(self.index)+1