Revision: 1791
Updated Code
at November 10, 2006 00:03 by whitetiger
Updated Code
import os import random import re import urllib import urllib2 class googleImages(object): RE_IMAGEURL = re.compile('imgurl=(http://.+?)&', re.DOTALL | re.IGNORECASE) def __init__(self): self.imagesURLs = {} def getRandomImages(self, imageName=None): ''' imageName = Nome dell'immagine da cercare, se non impostato viene generato un nome Random Scarica dal sito GoogleImages delle immagini in maniera random... ''' htmlPage = '' request = '' if imageName == None: imageName = self._randomWords() requestURL = 'http://images.google.it/images?q=%s&hl=it&start=%d' % (imageName, (random.randint(0, 50)*10)) requestHeaders = {'User-Agent':'googleImages/1.0'} try: request = urllib2.Request(requestURL, None, requestHeaders) htmlPage = urllib2.urlopen(request).read(500000) except: pass results = googleImages.RE_IMAGEURL.findall(htmlPage) if len(results) > 0: for image in results: imageURL = urllib.unquote_plus(image) if not imageURL.startswith('http://'): imageURL = 'http://'+imageURL self.imagesURLs[imageURL] = 0 def _randomWords(self): ''' Viene generata una parola in maniera Random... ''' words = '' charset = 'abcdefghijklmnopqrtuvwxyz'*2 + '0123456789' for i in range(random.randint(2, 7)): words += random.choice(charset) return words def downloadImages(self): ''' Scarica nella cartella googleIMGs le foto che vengono trovate in rete... ''' numberIMGs = len(self.imagesURLs) posIMGs = 1 for imageName in self.imagesURLs: print '[' + str(posIMGs) + '/' + str(numberIMGs) + '] - ' + imageName urllib.urlretrieve(imageName, 'googleIMGs' + os.sep + os.path.split(imageName)[1]) posIMGs += 1 if __name__ == '__main__': test = googleImages() test.getRandomImages() test.downloadImages() print 'Finito...'
Revision: 1790
Initial Code
Initial URL
Initial Description
Initial Title
Initial Tags
Initial Language
at November 9, 2006 23:55 by whitetiger
Initial Code
import os import random import re import urllib import urllib2 class googleImages(object): RE_IMAGEURL = re.compile('imgurl=(http://.+?)&', re.DOTALL | re.IGNORECASE) def __init__(self): self.imagesURLs = {} def getRandomImages(self, imageName=None): ''' imageName = Nome dell'immagine da cercare, se non impostato viene generato un nome Random Scarica dal sito GoogleImages delle immagini in maniera random... ''' htmlPage = '' request = '' if imageName == None: imageName = self._randomWords() requestURL = 'http://images.google.it/images?q=%s&hl=it&start=%d' % (imageName, (random.randint(0, 50)*10)) requestHeaders = {'User-Agent':'googleImages/1.0'} try: request = urllib2.Request(requestURL, None, requestHeaders) htmlPage = urllib2.urlopen(request).read(500000) except: pass results = googleImages.RE_IMAGEURL.findall(htmlPage) if len(results) > 0: for image in results: imageURL = urllib.unquote_plus(image) if not imageURL.startswith('http://'): imageURL = 'http://'+imageURL self.imagesURLs[imageURL] = 0 def _randomWords(self): ''' Viene generata una parola in maniera Random... ''' words = '' charset = 'abcdefghijklmnopqrtuvwxyz'*2 + '0123456789' for i in range(random.randint(2, 7)): words += random.choice(charset) return words def downloadImages(self): ''' Scarica nella cartella googleIMGs le foto che vengono trovate in rete... ''' numberIMGs = len(self.imagesURLs) posIMGs = 1 for imageName in self.imagesURLs: print '[' + str(posIMGs) + '/' + str(numberIMGs) + '] - ' + imageName urllib.urlretrieve(imageName, 'googleIMGs' + os.sep + os.path.split(imageName)[1]) posIMGs += 1 if __name__ == '__main__': test = googleImages() test.getRandomImages() test.downloadImages() print 'Finito...'
Initial URL
Initial Description
Initial Title
Python - randomGoogle
Initial Tags
regex, image, google, python, web
Initial Language
Python