/ Published in: Python
This script downloads all mp3 links from given rss/atom feeds.
I wrote this to learn python so I'm sure there are better ways to do this.
Depends on wget.
I wrote this to learn python so I'm sure there are better ways to do this.
Depends on wget.
Expand |
Embed | Plain Text
Copy this code and paste it in your HTML
#!/usr/bin/python """ @name Music Downloader @description This script downloads all mp3 links from given rss/atom feeds. I wrote this to learn python so I'm sure there are better ways to do this. It works in Mac OSX Leopard WITH wget. __Add your feed's uris at the bottom__ @author [email protected] ------------------------------------------------------------------------------ Usage: $ python downloader.py [-t] Options: -t Shows the links retrieved from all feeds. No download takes place. TODO: - logging - file size restriction support - multithread support - links as swf player param """ import sqlite3 import feedparser import urllib import urlparse import os import sys import re class Downloader: def __init__(self, feeds): if type(feeds) is not list: raise Exception('A list of blogs is needed') self.userAgent = 'Mozilla/5.0 (X11; U; Linux i686; en-US;rv:1.8.1.3) Gecko/20061201 Firefox/2.0.0.3 (Ubuntu-feisty)' self.conn = sqlite3.connect('downloader.db') self.downloadDir = os.getcwd() c = self.conn.cursor() sql = "SELECT name FROM sqlite_master WHERE type='table' AND name='downloaded'" if c.execute(sql).fetchone() == None: sql = 'CREATE TABLE downloaded(id INTEGER PRIMARY KEY, remoteFile TEXT, datetime DATE_TIME)' c.execute(sql) self.conn.commit() self.links = [] for feed in feeds: self.feedData = feedparser.parse(feed) self.addLinks(feed) try: if sys.argv[1] == '-t': for i in self.links: print i except IndexError, e: print '%i tracks will be downloaded' % len(self.links) self.downloadFiles() def addLinks(self, feed = ''): re_atom = re.compile('^atom.*') re_rss = re.compile('^rss.*') print 'Adding links in ' + self.feedData.version + ' feed in ' + feed if re_atom.search(self.feedData.version): self.__addAtomLinks() elif re_rss.search(self.feedData.version): self.__addRssLinks() def __addAtomLinks(self): re_links = re.compile('<a[^>]+href="(http:[^"]+)"[^>]*>') re_mp3 = re.compile('mp3$') html = '' for entry in self.feedData.entries: """ atom03 """ html += entry.content[0]['value'] links = re_links.findall(html) for link in links: if re_mp3.search(link.lower()): self.links.append(link) def __addRssLinks(self): for entry in self.feedData.entries: if hasattr(entry, 'enclosures'): for link in entry.enclosures: if link.type == 'audio/mpeg': self.links.append(link.href) def isDownloaded(self, remoteFile): c = self.conn.cursor() sql = "SELECT id FROM downloaded WHERE remoteFile = ?" return c.execute(sql, (remoteFile, )).fetchone() != None def downloadFiles(self): i = 0 c = self.conn.cursor() for link in self.links: #if i > 5 : break remoteFile = urlparse.urlsplit(link)[2].split('/')[-1] localFileName = urllib.unquote(remoteFile) localFile = self.downloadDir + "/" + localFileName if self.isDownloaded(remoteFile) == False: if os.path.exists(localFile): self.skippingMessage(localFileName) continue; print 'Downloading ' + localFileName + ' ... ' os.system('wget --tries=2 --timeout=10 --user-agent="%s" "%s"' % (self.userAgent, link)) sql = 'INSERT INTO downloaded (remoteFile, datetime) VALUES(?, DATETIME("NOW"))' c.execute(sql, (remoteFile, )) self.conn.commit() i += 1 print '------------------------[ O K %i ]--------------------------' % i else: self.skippingMessage(localFileName) def skippingMessage(self, remoteFile): print 'File ' + remoteFile + ' alredy exists' print '------------------------[ S K I P ]--------------------------' if __name__ == '__main__': feeds = [ 'http://oldbluebus.blogspot.com/atom.xml', 'http://ravensingstheblues.blogspot.com/atom.xml' ] Downloader(feeds)