Mp3 downloader from feeds - Python Snipplr Social Repository

Revision: 7562

at July 29, 2008 21:57 by panquetofobia

Updated Code

#!/usr/bin/python
"""
@name Music Downloader
@description This script downloads all mp3 links from given rss/atom feeds.
I wrote this to learn python so I'm sure there are better ways to do this.
It works in Mac OSX Leopard WITH wget.
__Add your feed's uris at the bottom__

@author [email protected]



------------------------------------------------------------------------------

Usage: $ python downloader.py [-t]
Options:
      -t  Shows the links retrieved from all feeds. No download takes place.

TODO:
- logging
- file size restriction support
- multithread support
- links as swf player param
"""
import sqlite3
import feedparser
import urllib
import urlparse
import os
import sys
import re

class Downloader:

   def __init__(self, feeds):
       if type(feeds) is not list:
           raise Exception('A list of blogs is needed')
       self.userAgent = 'Mozilla/5.0 (X11; U; Linux i686; en-US;rv:1.8.1.3) Gecko/20061201 Firefox/2.0.0.3 (Ubuntu-feisty)'
       self.conn = sqlite3.connect('downloader.db')
       self.downloadDir = os.getcwd()
       c = self.conn.cursor()
       sql = "SELECT name FROM sqlite_master WHERE type='table' AND name='downloaded'"
       if c.execute(sql).fetchone() == None:
           sql = 'CREATE TABLE downloaded(id INTEGER PRIMARY KEY, remoteFile TEXT, datetime DATE_TIME)'
           c.execute(sql)
           self.conn.commit()
       self.links = []
       for feed in feeds:
           self.feedData = feedparser.parse(feed)
           self.addLinks(feed)
       try:
           if sys.argv[1] == '-t':
               for i in self.links:
                   print i
       except IndexError, e:
           print '%i tracks will be downloaded' % len(self.links)
           print
           self.downloadFiles()

   def addLinks(self, feed = ''):
      re_atom = re.compile('^atom.*')
      re_rss  = re.compile('^rss.*')
      print 'Adding links in ' + self.feedData.version + ' feed in ' + feed
      if re_atom.search(self.feedData.version):
          self.__addAtomLinks()
      elif re_rss.search(self.feedData.version):
          self.__addRssLinks()

   def __addAtomLinks(self):
      re_links = re.compile('<a[^>]+href="(http:[^"]+)"[^>]*>')
      re_mp3   = re.compile('mp3$')
      html = ''
      for entry in self.feedData.entries:
          """ atom03 """
          html += entry.content[0]['value']
      links = re_links.findall(html)
      for link in links:
          if re_mp3.search(link.lower()):
              self.links.append(link)

   def __addRssLinks(self):
       for entry in self.feedData.entries:
           if hasattr(entry, 'enclosures'):
               for link in entry.enclosures:
                   if link.type == 'audio/mpeg':
                       self.links.append(link.href)

   def isDownloaded(self, remoteFile):
       c = self.conn.cursor()
       sql = "SELECT id FROM downloaded WHERE remoteFile = ?"
       return c.execute(sql, (remoteFile, )).fetchone() != None


   def downloadFiles(self):
       i = 0
       c = self.conn.cursor()
       for link in self.links:
           #if i > 5 : break
           remoteFile = urlparse.urlsplit(link)[2].split('/')[-1]
           localFileName = urllib.unquote(remoteFile)
           localFile  = self.downloadDir + "/" + localFileName
           if self.isDownloaded(remoteFile) == False:
               if os.path.exists(localFile): 
                   self.skippingMessage(localFileName)
                   continue;
               print
               print 'Downloading ' + localFileName + ' ... '
               os.system('wget --tries=2 --timeout=10 --user-agent="%s" "%s"' % (self.userAgent, link))
               sql = 'INSERT INTO downloaded (remoteFile, datetime) VALUES(?, DATETIME("NOW"))'
               c.execute(sql, (remoteFile, ))
               self.conn.commit()
               i += 1
               print '------------------------[ O K %i ]--------------------------' % i
           else:
               self.skippingMessage(localFileName)

   def skippingMessage(self, remoteFile):
       print 'File ' + remoteFile + ' alredy exists'
       print '------------------------[  S K I P ]--------------------------'
       
if __name__ == '__main__':
    
    feeds = [
      'http://oldbluebus.blogspot.com/atom.xml',
      'http://ravensingstheblues.blogspot.com/atom.xml'
    ]
    
    Downloader(feeds)

Revision: 7561

at July 29, 2008 21:54 by panquetofobia

Initial Code

#!/usr/bin/python
"""
@name Music Downloader
@description This script downloads all mp3 links from given rss/atom feeds.
I wrote this to learn python so I'm sure there are better ways to do this.
It works in Mac OSX Leopard.
__Add your feed's uris at the bottom__

@author [email protected]



------------------------------------------------------------------------------

Usage: $ python downloader.py [-t]
Options:
      -t  Shows the links retrieved from all feeds. No download takes place.

TODO:
- logging
- file size restriction support
- multithread support
- links as swf player param
"""
import sqlite3
import feedparser
import urllib
import urlparse
import os
import sys
import re

class Downloader:

   def __init__(self, feeds):
       if type(feeds) is not list:
           raise Exception('A list of blogs is needed')
       self.userAgent = 'Mozilla/5.0 (X11; U; Linux i686; en-US;rv:1.8.1.3) Gecko/20061201 Firefox/2.0.0.3 (Ubuntu-feisty)'
       self.conn = sqlite3.connect('downloader.db')
       self.downloadDir = os.getcwd()
       c = self.conn.cursor()
       sql = "SELECT name FROM sqlite_master WHERE type='table' AND name='downloaded'"
       if c.execute(sql).fetchone() == None:
           sql = 'CREATE TABLE downloaded(id INTEGER PRIMARY KEY, remoteFile TEXT, datetime DATE_TIME)'
           c.execute(sql)
           self.conn.commit()
       self.links = []
       for feed in feeds:
           self.feedData = feedparser.parse(feed)
           self.addLinks(feed)
       try:
           if sys.argv[1] == '-t':
               for i in self.links:
                   print i
       except IndexError, e:
           print '%i tracks will be downloaded' % len(self.links)
           print
           self.downloadFiles()

   def addLinks(self, feed = ''):
      re_atom = re.compile('^atom.*')
      re_rss  = re.compile('^rss.*')
      print 'Adding links in ' + self.feedData.version + ' feed in ' + feed
      if re_atom.search(self.feedData.version):
          self.__addAtomLinks()
      elif re_rss.search(self.feedData.version):
          self.__addRssLinks()

   def __addAtomLinks(self):
      re_links = re.compile('<a[^>]+href="(http:[^"]+)"[^>]*>')
      re_mp3   = re.compile('mp3$')
      html = ''
      for entry in self.feedData.entries:
          """ atom03 """
          html += entry.content[0]['value']
      links = re_links.findall(html)
      for link in links:
          if re_mp3.search(link.lower()):
              self.links.append(link)

   def __addRssLinks(self):
       for entry in self.feedData.entries:
           if hasattr(entry, 'enclosures'):
               for link in entry.enclosures:
                   if link.type == 'audio/mpeg':
                       self.links.append(link.href)

   def isDownloaded(self, remoteFile):
       c = self.conn.cursor()
       sql = "SELECT id FROM downloaded WHERE remoteFile = ?"
       return c.execute(sql, (remoteFile, )).fetchone() != None


   def downloadFiles(self):
       i = 0
       c = self.conn.cursor()
       for link in self.links:
           #if i > 5 : break
           remoteFile = urlparse.urlsplit(link)[2].split('/')[-1]
           localFileName = urllib.unquote(remoteFile)
           localFile  = self.downloadDir + "/" + localFileName
           if self.isDownloaded(remoteFile) == False:
               if os.path.exists(localFile): 
                   self.skippingMessage(localFileName)
                   continue;
               print
               print 'Downloading ' + localFileName + ' ... '
               os.system('wget --tries=2 --timeout=10 --user-agent="%s" "%s"' % (self.userAgent, link))
               sql = 'INSERT INTO downloaded (remoteFile, datetime) VALUES(?, DATETIME("NOW"))'
               c.execute(sql, (remoteFile, ))
               self.conn.commit()
               i += 1
               print '------------------------[ O K %i ]--------------------------' % i
           else:
               self.skippingMessage(localFileName)

   def skippingMessage(self, remoteFile):
       print 'File ' + remoteFile + ' alredy exists'
       print '------------------------[  S K I P ]--------------------------'
       
if __name__ == '__main__':
    
    feeds = [
      'http://oldbluebus.blogspot.com/atom.xml',
      'http://ravensingstheblues.blogspot.com/atom.xml'
    ]
    
    Downloader(feeds)

Initial URL

Initial Description

This script downloads all mp3 links from given rss/atom feeds.
I wrote this to learn python so I'm sure there are better ways to do this.
Depends on wget.

Initial Title

Mp3 downloader from feeds

Initial Tags

python

Initial Language

Python

Choose a language for easy browsing: