Revision: 59320
Initial Code
Initial URL
Initial Description
Initial Title
Initial Tags
Initial Language
at September 1, 2012 07:15 by scrapy
Initial Code
# Gather global stats and sending it via email/outputting to stdout when scrapy is done.
"""
StatsDump extension sends an email and prints stats to stdout when a engine finishes scraping.
Use STATSMAILER_RCPTS setting to give the recipient mail address
"""
from scrapy.xlib.pydispatch import dispatcher
from datetime import datetime
from scrapy.stats import stats
from scrapy import signals
from scrapy.mail import MailSender
from scrapy.conf import settings
from pprint import pprint
class StatsDump(object):
def __init__(self):
self.recipients = settings.getlist("STATSMAILER_RCPTS")
dispatcher.connect(self.engine_stopped, signals.engine_stopped)
dispatcher.connect(self.engine_started, signals.engine_started)
def engine_started(self):
self.start_time = datetime.now()
def engine_stopped(self):
now_time = datetime.now()
stats.set_value('start_time', str(self.start_time))
stats.set_value('finish_time', str(now_time))
stats.set_value('total_time', str(now_time - self.start_time))
if self.recipients:
mail = MailSender()
body = "Global stats\n\n"
body += "\n".join("%-50s : %s" % i for i in stats.get_stats().items())
mail.send(self.recipients, "Scrapy stats", body)
pprint(stats.get_stats())
# Snippet imported from snippets.scrapy.org (which no longer works)
# author: dchaplinsky
# date : Oct 07, 2011
Initial URL
Initial Description
Initial Title
My approach to stats extension
Initial Tags
Initial Language
Python