Scraped item to mysql in scrapy


/ Published in: Python
Save to your folder(s)

To store you item into mysql database.

1. I say you have been have mysql database and the table what field is the same in item class that you wang to be stored

2. I just ensure you item insert into mysql, do not ensure the performance

3. Todo

Abstract pipeline class


Copy this code and paste it in your HTML
  1. // File: myscrapy/items.py
  2. ..
  3.  
  4. # -*- coding: utf-8 -*-
  5.  
  6. import scrapy
  7.  
  8.  
  9. class FundItem(scrapy.Item):
  10. stock = scrapy.Field() # 股票名称
  11. deadline = scrapy.Field() # 截止日期
  12. fund = scrapy.Field() # 基金名称
  13. code = scrapy.Field() # 基金代码
  14. count = scrapy.Field() # 持仓数量(股)
  15. percentcir = scrapy.Field() # 占流通股比例(%)
  16. svalue = scrapy.Field() # 持股市值(元)
  17. percentnet = scrapy.Field() # 占净值比例(%)
  18.  
  19. // File: myscrapy/pipelines.py
  20. ..
  21. # -*- coding: utf-8 -*-
  22.  
  23. from twisted.enterprise import adbapi
  24. from scrapy.utils.project import get_project_settings
  25.  
  26. settings = get_project_settings()
  27.  
  28.  
  29. class FundPipeline(object):
  30. # The table you items.FundItem class map to, my table is named fund
  31. insert_sql = """insert into fund (%s) values ( %s )"""
  32.  
  33. def __init__(self):
  34. dbargs = settings.get('DB_CONNECT')
  35. db_server = settings.get('DB_SERVER')
  36. dbpool = adbapi.ConnectionPool(db_server, **dbargs)
  37. self.dbpool = dbpool
  38.  
  39. def __del__(self):
  40. self.dbpool.close()
  41.  
  42. def process_item(self, item, spider):
  43. self.insert_data(item, self.insert_sql)
  44. return item
  45.  
  46. def insert_data(self, item, insert):
  47. keys = item.fields.keys()
  48. fields = u','.join(keys)
  49. qm = u','.join([u'%s'] * len(keys))
  50. sql = insert % (fields, qm)
  51. data = [item[k] for k in keys]
  52. return self.dbpool.runOperation(sql, data)
  53.  
  54.  
  55. // File: myscrapy/settings.py
  56. ..
  57. # -*- coding: utf-8 -*-
  58. ...
  59.  
  60. ITEM_PIPELINES = {
  61. 'myscrapy.pipelines.FundPipeline': 500,
  62. }
  63.  
  64. DB_SERVER = 'MySQLdb' # For detail, please see twisted doc
  65. DB_CONNECT = {
  66. 'db': 'finance', # Your db
  67. 'user': 'root', #
  68. 'passwd': 'root', #
  69. 'host': '10.19.80.197', # Your Server
  70. 'charset': 'utf8',
  71. 'use_unicode': True,
  72. }
  73. ...

Report this snippet


Comments

RSS Icon Subscribe to comments

You need to login to post a comment.