Revision: 59338
Initial Code
Initial URL
Initial Description
Initial Title
Initial Tags
Initial Language
at September 1, 2012 07:15 by scrapy
Initial Code
# Description
# -----------
#
# This is a Spider Queue that uses a plain text file as backend, storing one spider name per line.
#
# Usage example
# -------------
#
# First start scrapy in server mode:
#
# scrapy runserver --set SPIDER_QUEUE_CLASS=path.to.TextFileSpiderQueue
#
# Then add spiders to crawl from the shell with:
#
# $ echo myspider >>queue.txt
#
# It also works with the Scrapy `queue` command:
#
# $ scrapy queue add myspider
# Added: name=pwc_sg args={}
#
# Limitations
# -----------
#
# * It doesn't support spider arguments
#
# * It's not concurrency safe.
import os
from zope.interface import implements
from scrapy.interfaces import ISpiderQueue
class TextFileSpiderQueue(object):
implements(ISpiderQueue)
FILE = 'queue.txt'
@classmethod
def from_settings(cls, settings):
return cls()
def add(self, name, **spider_args):
with open(self.FILE, 'a') as f:
f.write(name + os.linesep)
def pop(self):
msgs = list(open(self.FILE)) if os.path.exists(self.FILE) else []
if not msgs:
return
with open(self.FILE, 'w') as f:
f.writelines(msgs[1:])
return {'name': msgs[0].strip()}
def count(self):
return len(list(open(self.FILE)))
def list(self):
return [{'name': x.strip()} for x in open(self.FILE)]
def clear(self):
os.remove(self.FILE)
# Snippet imported from snippets.scrapy.org (which no longer works)
# author: pablo
# date : Sep 05, 2010
Initial URL
Initial Description
Initial Title
Text-file Spider Queue
Initial Tags
file
Initial Language
Python