1234567891011121314151617181920212223242526272829303132333435363738394041424344 |
- # -*- coding: utf-8 -*-
- # Define your item pipelines here
- #
- # Don't forget to add your pipeline to the ITEM_PIPELINES setting
- # See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html
- from scrapy import signals
- import json
- import codecs
- from twisted.enterprise import adbapi
- from datetime import datetime
- from hashlib import md5
- import MySQLdb
- import MySQLdb.cursors
- class XiaowuPipeline(object):
- def process_item(self, item, spider):
- return item
- class BaiduImagePipeline(object):
- def __init__(self, dbpool):
- self.dbpool = dbpool
- @classmethod
- def from_settings(cls, settings):
- dbargs = dict(
- host=settings['MYSQL_HOST'],
- db=settings['MYSQL_DBNAME'],
- user=settings['MYSQL_USER'],
- passwd=settings['MYSQL_PASSWD'],
- charset='utf8',
- cursorclass = MySQLdb.cursors.DictCursor,
- use_unicode= True,
- )
- dbpool = adbapi.ConnectionPool('MySQLdb', **dbargs)
- return cls(dbpool)
- def process_item(self, item, spider):
- query=self.dbpool.runInteraction(self._conditional_insert,item)#调用插入的方法
- query.addErrback(self._handle_error,item,spider)#调用异常处理方法
- return item
- def _conditional_insert(self,tx,item):
- #print item['name']
- sql="insert into test(name,title,content) values(%s,%s,%s)"
- params=(item["name"],item["title"],item['content'])
- tx.execute(sql,params)
- def _handle_error(self, failue, item, spider):
- print(failue)
|