Return to Snippet

Revision: 68079
at November 25, 2014 13:30 by zwidnypublic


Initial Code
// File: myscrapy/items.py
..

# -*- coding: utf-8 -*-

import scrapy    
    
    
class FundItem(scrapy.Item):                                                                                                        
    stock = scrapy.Field()  # 股票名称                                          
    deadline = scrapy.Field()  # 截止日期                                       
    fund = scrapy.Field()  # 基金名称                                           
    code = scrapy.Field()  # 基金代码                                           
    count = scrapy.Field()  # 持仓数量(股)                                      
    percentcir = scrapy.Field()  # 占流通股比例(%)                              
    svalue = scrapy.Field()  # 持股市值(元)                                   
    percentnet = scrapy.Field()  # 占净值比例(%)   

// File: myscrapy/pipelines.py
..
# -*- coding: utf-8 -*- 

from twisted.enterprise import adbapi    
from scrapy.utils.project import get_project_settings    
    
settings = get_project_settings() 


class FundPipeline(object):
    # The table you items.FundItem class map to, my table is named fund    
    insert_sql = """insert into fund (%s) values ( %s )"""    
    
    def __init__(self):    
        dbargs = settings.get('DB_CONNECT')    
        db_server = settings.get('DB_SERVER')    
        dbpool = adbapi.ConnectionPool(db_server, **dbargs)    
        self.dbpool = dbpool    
    
    def __del__(self):    
        self.dbpool.close()    
    
    def process_item(self, item, spider):    
        self.insert_data(item, self.insert_sql)    
        return item    
    
    def insert_data(self, item, insert):    
        keys = item.fields.keys()    
        fields = u','.join(keys)    
        qm = u','.join([u'%s'] * len(keys))    
        sql = insert % (fields, qm)    
        data = [item[k] for k in keys]
        return self.dbpool.runOperation(sql, data)


// File: myscrapy/settings.py
..
# -*- coding: utf-8 -*-
...

ITEM_PIPELINES = {    
    'myscrapy.pipelines.FundPipeline': 500,    
}  

DB_SERVER = 'MySQLdb'            # For detail, please see twisted doc
DB_CONNECT = {    
    'db': 'finance',             # Your db   
    'user': 'root',              # 
    'passwd': 'root',            # 
    'host': '10.19.80.197',      # Your Server
    'charset': 'utf8',    
    'use_unicode': True,    
}   
...

Initial URL

                                

Initial Description
To store you item into mysql database.

1. I say you have been have mysql database and the table what field is the same in item class that you wang to be stored
 
2. I just ensure you item insert into mysql, do not ensure the performance

3. Todo

    Abstract pipeline class

Initial Title
Scraped item to mysql in scrapy

Initial Tags
mysql

Initial Language
Python