Return to Snippet

Revision: 1791
at November 10, 2006 00:03 by whitetiger


Updated Code
import os
import random
import re
import urllib
import urllib2

class googleImages(object):
    
    RE_IMAGEURL = re.compile('imgurl=(http://.+?)&', re.DOTALL | re.IGNORECASE)
    
    def __init__(self):
        
        self.imagesURLs = {}
    
    def getRandomImages(self, imageName=None):
        '''
        imageName = Nome dell'immagine da cercare, se non impostato viene generato un nome Random
        
        Scarica dal sito GoogleImages delle immagini in maniera random...
        '''
        
        htmlPage = ''
        request = ''
        
        if imageName == None: imageName = self._randomWords()
        
        requestURL = 'http://images.google.it/images?q=%s&hl=it&start=%d' % (imageName, (random.randint(0, 50)*10))
        requestHeaders = {'User-Agent':'googleImages/1.0'}
        
        try:
            request = urllib2.Request(requestURL, None, requestHeaders)
            htmlPage = urllib2.urlopen(request).read(500000)
        except:
            pass
        
        results = googleImages.RE_IMAGEURL.findall(htmlPage)
        
        if len(results) > 0:
            for image in results:
                imageURL = urllib.unquote_plus(image)
                if not imageURL.startswith('http://'): imageURL = 'http://'+imageURL
                self.imagesURLs[imageURL] = 0
    
    def _randomWords(self):
        '''
        Viene generata una parola in maniera Random...
        '''
        
        words = ''
        charset = 'abcdefghijklmnopqrtuvwxyz'*2 + '0123456789'
        
        for i in range(random.randint(2, 7)): words += random.choice(charset)
                
        return words
    
    def downloadImages(self):
        '''
        Scarica nella cartella googleIMGs le foto che vengono trovate in rete...
        '''
        
        numberIMGs = len(self.imagesURLs)
        posIMGs = 1
        
        for imageName in self.imagesURLs:
            print '[' + str(posIMGs) + '/' + str(numberIMGs) + '] - ' + imageName
            urllib.urlretrieve(imageName, 'googleIMGs' + os.sep + os.path.split(imageName)[1])
            posIMGs += 1
    
if __name__ == '__main__':
    
    test = googleImages()
    
    test.getRandomImages()
    test.downloadImages()
    
    print 'Finito...'

Revision: 1790
at November 9, 2006 23:55 by whitetiger


Initial Code
import os
import random
import re
import urllib
import urllib2

class googleImages(object):
    
    RE_IMAGEURL = re.compile('imgurl=(http://.+?)&', re.DOTALL | re.IGNORECASE)
    
    def __init__(self):
        
        self.imagesURLs = {}
    
    def getRandomImages(self, imageName=None):
        '''
        imageName = Nome dell'immagine da cercare, se non impostato viene generato un nome Random
        
        Scarica dal sito GoogleImages delle immagini in maniera random...
        '''
        
        htmlPage = ''
        request = ''
        
        if imageName == None: imageName = self._randomWords()
        
        requestURL = 'http://images.google.it/images?q=%s&hl=it&start=%d' % (imageName, (random.randint(0, 50)*10))
        requestHeaders = {'User-Agent':'googleImages/1.0'}
        
        try:
            request = urllib2.Request(requestURL, None, requestHeaders)
            htmlPage = urllib2.urlopen(request).read(500000)
        except:
            pass
        
        results = googleImages.RE_IMAGEURL.findall(htmlPage)
        
        if len(results) > 0:
            for image in results:
                imageURL = urllib.unquote_plus(image)
                if not imageURL.startswith('http://'): imageURL = 'http://'+imageURL
                self.imagesURLs[imageURL] = 0
    
    def _randomWords(self):
        '''
        Viene generata una parola in maniera Random...
        '''
        
        words = ''
        charset = 'abcdefghijklmnopqrtuvwxyz'*2 + '0123456789'
        
        for i in range(random.randint(2, 7)): words += random.choice(charset)
                
        return words
    
    def downloadImages(self):
        '''
        Scarica nella cartella googleIMGs le foto che vengono trovate in rete...
        '''
        
        numberIMGs = len(self.imagesURLs)
        posIMGs = 1
        
        for imageName in self.imagesURLs:
            print '[' + str(posIMGs) + '/' + str(numberIMGs) + '] - ' + imageName
            urllib.urlretrieve(imageName, 'googleIMGs' + os.sep + os.path.split(imageName)[1])
            posIMGs += 1
    
if __name__ == '__main__':
    
    test = googleImages()
    
    test.getRandomImages()
    test.downloadImages()
    
    print 'Finito...'

Initial URL


Initial Description


Initial Title
Python - randomGoogle

Initial Tags
regex, image, google, python, web

Initial Language
Python