Return to Snippet

Revision: 17075
at August 22, 2009 06:17 by birdspider


Updated Code
# fetches first site of deviantart, extracts info and fetches fullsize images ~ 26 pieces
# © Patrik Plihal; patrik.plihal gmx at
# license: GPL

import urllib.request
import re

savefile = 'aria2c_job'

#target = "http://www.deviantart.com" #main site
target = "http://browse.deviantart.com/traditional/?order=24" # category 'traditional'

# query html
response = urllib.request.urlopen(target,timeout=5)
content = response.read()

# calculate fullscreen
content = content.replace(b'http://th',b'http://fc')
content = content.replace(b'/150/',b'/')

# filter images
imgs = re.findall(b"src=\"(http:\/\/fc[^\"]*\.jpg)\"",content)

# save
print("fetching %s image links from deviantart..." % len(imgs))
with open(savefile, mode='wb') as job:
	for img in imgs:
		job.write(img + b"\n")
print('saving to "' + savefile + '"')
print('done')

Revision: 17074
at August 22, 2009 06:17 by birdspider


Updated Code
# fetches first site of deviantart, extracts info and fetches fullsize images ~ 26 pieces
# © Patrik Plihal; patrik.plihal gmx at
# license: GPL

import urllib.request
import re

savefile = 'aria2c_job'

#target = "http://www.deviantart.com" #main site
target = "http://browse.deviantart.com/traditional/?order=24" # category 'traditional'

# query html
response = urllib.request.urlopen(target,timeout=5)
content = response.read()

# calculate fullscreen
content = content.replace(b'http://th',b'http://fc')
content = content.replace(b'/150/',b'/')

# filter images
imgs = re.findall(b"src=\"(http:\/\/fc[^\"]*\.jpg)\"",content)

# save
print("fetching %s image links from deviantart..." % len(imgs))
with open(savefile, mode='wb') as job:
	for img in imgs:
		job.write(img + b"\n")
print('saving to "' + savefile + '"')
print('done')

Revision: 17073
at August 22, 2009 06:14 by birdspider


Initial Code
# fetches first site of deviantart, extracts info and fetches fullsize images ~ 26 pieces
# © Patrik Plihal; patrik.plihal gmx at
# license: GPL

import urllib.request
import re

savefile = 'aria2c_job'

#target = "http://www.deviantart.com" #main site
target = "http://browse.deviantart.com/traditional/?order=24" # category 'traditional'

# query html
response = urllib.request.urlopen(target,timeout=5)
content = response.read()

# calculate fullscreen
content = content.replace(b'http://th',b'http://fc')
content = content.replace(b'/150/',b'/')

# filter images
imgs = re.findall(b"src=\"(http:\/\/fc[^\"]*\.jpg)\"",content)

# save
print("fetching %s image links from deviantart..." % len(imgs))
with open(savefile, mode='wb') as job:
	for img in imgs:
		job.write(img + b"\n")
print('saving to ' + savefile)
print('done')

Initial URL

                                

Initial Description
untested and will probably destroy your computer

coded for python3.1

Initial Title
deviant art image mini-crawler

Initial Tags

                                

Initial Language
Python