No Description

pipelines.py 1.5KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344
  1. # -*- coding: utf-8 -*-
  2. # Define your item pipelines here
  3. #
  4. # Don't forget to add your pipeline to the ITEM_PIPELINES setting
  5. # See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html
  6. from scrapy import signals
  7. import json
  8. import codecs
  9. from twisted.enterprise import adbapi
  10. from datetime import datetime
  11. from hashlib import md5
  12. import MySQLdb
  13. import MySQLdb.cursors
  14. class XiaowuPipeline(object):
  15. def process_item(self, item, spider):
  16. return item
  17. class BaiduImagePipeline(object):
  18. def __init__(self, dbpool):
  19. self.dbpool = dbpool
  20. @classmethod
  21. def from_settings(cls, settings):
  22. dbargs = dict(
  23. host=settings['MYSQL_HOST'],
  24. db=settings['MYSQL_DBNAME'],
  25. user=settings['MYSQL_USER'],
  26. passwd=settings['MYSQL_PASSWD'],
  27. charset='utf8',
  28. cursorclass = MySQLdb.cursors.DictCursor,
  29. use_unicode= True,
  30. )
  31. dbpool = adbapi.ConnectionPool('MySQLdb', **dbargs)
  32. return cls(dbpool)
  33. def process_item(self, item, spider):
  34. query=self.dbpool.runInteraction(self._conditional_insert,item)#调用插入的方法
  35. query.addErrback(self._handle_error,item,spider)#调用异常处理方法
  36. return item
  37. def _conditional_insert(self,tx,item):
  38. #print item['name']
  39. sql="insert into test(name,title,content) values(%s,%s,%s)"
  40. params=(item["name"],item["title"],item['content'])
  41. tx.execute(sql,params)
  42. def _handle_error(self, failue, item, spider):
  43. print(failue)