Tumblr taglist generator


/ Published in: Python
Save to your folder(s)

Tumblr taglist generator returns your Tumblog tags under JSON format. Written in Python and utilized Google App Engine.


Copy this code and paste it in your HTML
  1. import cgi
  2.  
  3. import md5
  4.  
  5. import time
  6.  
  7. import datetime
  8.  
  9. import math
  10.  
  11. from xml.dom import minidom
  12.  
  13. import urllib
  14.  
  15. import sys
  16.  
  17.  
  18.  
  19. from google.appengine.api import users
  20.  
  21. from google.appengine.ext import webapp
  22.  
  23. from google.appengine.ext.webapp.util import run_wsgi_app
  24.  
  25. from google.appengine.ext import db
  26.  
  27. from google.appengine.api import urlfetch
  28.  
  29. from django.utils import simplejson as json
  30.  
  31.  
  32.  
  33.  
  34.  
  35. # Database Classes
  36.  
  37. class TumblrCache(db.Model):
  38.  
  39. cache_id = db.StringProperty(required=True)
  40.  
  41. content = db.StringProperty(multiline=True)
  42.  
  43. date = db.DateTimeProperty(auto_now_add=True)
  44.  
  45.  
  46.  
  47. class Flush(webapp.RequestHandler):
  48.  
  49. def get(self):
  50.  
  51. self.response.headers['Content-Type'] = 'text/plain'
  52.  
  53. w = self.response.out
  54.  
  55.  
  56.  
  57. query = TumblrCache.all()
  58.  
  59. for q in query:
  60.  
  61. q.delete()
  62.  
  63.  
  64.  
  65. w.write('FLUSHED!')
  66.  
  67.  
  68.  
  69.  
  70.  
  71. class MainPage(webapp.RequestHandler):
  72.  
  73. def get(self):
  74.  
  75. self.response.headers['Content-Type'] = 'text/plain'
  76.  
  77.  
  78.  
  79. class TagCloud(webapp.RequestHandler):
  80.  
  81. def get(self):
  82.  
  83. self.response.headers['Content-Type'] = 'text/plain'
  84.  
  85. w = self.response.out
  86.  
  87. # Thu thap cac bien can thiet
  88.  
  89. tumblr_api = cgi.escape(self.request.get('url').encode('utf-8')) + '/api/read'
  90.  
  91. tumblr_api = 'http://im.doquangtu.net/api/read'
  92.  
  93. cache_id = md5.new(tumblr_api).hexdigest()
  94.  
  95. # Neu da co cache, thi tra lai cache
  96.  
  97. try:
  98.  
  99. cache = TumblrCache.gql("WHERE cache_id = :1 LIMIT 1", cache_id)[0]
  100.  
  101. except:
  102.  
  103. cache = TumblrCache(cache_id=cache_id)
  104.  
  105.  
  106.  
  107. content = cache.content
  108.  
  109. if content == None:
  110.  
  111. content = ''
  112.  
  113.  
  114.  
  115. d = (((cache.date.toordinal()-719163)*24+cache.date.hour)*60+cache.date.minute)*60+cache.date.second
  116.  
  117. d = time.time() - d
  118.  
  119.  
  120.  
  121. if d > 24*60*60 or len(content) < 5:
  122.  
  123. # Da qua 24h cache trong he thong
  124.  
  125. form_fields = {
  126.  
  127. "filter": "text"
  128.  
  129. }
  130.  
  131.  
  132.  
  133. loop = True
  134.  
  135. start = 0
  136.  
  137. taglist = {}
  138.  
  139.  
  140.  
  141.  
  142.  
  143.  
  144.  
  145. try:
  146.  
  147. while loop == True:
  148.  
  149. form_fields['start'] = int(start) * 20
  150.  
  151. form_data = urllib.urlencode(form_fields)
  152.  
  153. result = urlfetch.fetch(tumblr_api + '?filter=text&start=' + str(form_fields['start']))
  154.  
  155. result = result.content #.encode('utf-8')
  156.  
  157.  
  158.  
  159. xdom = minidom.parseString(result)
  160.  
  161. try:
  162.  
  163. posts = xdom.firstChild.getElementsByTagName("posts")[0].getElementsByTagName("post")
  164.  
  165. for post in posts:
  166.  
  167. post_id = post.getAttribute("id")
  168.  
  169. tags = post.getElementsByTagName("tag")
  170.  
  171. try:
  172.  
  173. for tag in tags:
  174.  
  175. # Duyet qua tags
  176.  
  177. t = "" . join(t.nodeValue for t in tag.childNodes if t.nodeType == t.TEXT_NODE)
  178.  
  179. t = t.strip()
  180.  
  181. #w.write(t)
  182.  
  183. if t not in taglist:
  184.  
  185. taglist[t] = 1
  186.  
  187. else:
  188.  
  189. taglist[t] = taglist[t] + 1
  190.  
  191. except:
  192.  
  193. # Loi & khong tim thay tags nao
  194.  
  195. i = 0
  196.  
  197. except:
  198.  
  199. # Loi & khong tim thay posts nao
  200.  
  201. i = 0
  202.  
  203.  
  204.  
  205. # kiem tra xem co thoat vong lap hay chua
  206.  
  207. try:
  208.  
  209. posts = xdom.firstChild.getElementsByTagName("posts")[0].getAttribute("total")
  210.  
  211. posts = int(math.ceil(float(posts) / float(20)) - 1)
  212.  
  213. if start + 1 > posts:
  214.  
  215. loop = False
  216.  
  217. else:
  218.  
  219. start = start + 1
  220.  
  221. loop = True
  222.  
  223. except:
  224.  
  225. loop = False
  226.  
  227. except:
  228.  
  229. loop = False
  230.  
  231.  
  232.  
  233. # Viet tags
  234.  
  235. txt = json.JSONEncoder().encode( taglist )
  236.  
  237.  
  238.  
  239. cache.content = txt
  240.  
  241. cache.date = datetime.datetime.today()
  242.  
  243. cache.put()
  244.  
  245.  
  246.  
  247. w.write(taglist)
  248.  
  249. else:
  250.  
  251. # Chua qua 24h cache trong he thong
  252.  
  253. # Tra ve cache data
  254.  
  255. content = cache.content
  256.  
  257. w.write( "var myJSONObject = " + content + ";" )
  258.  
  259.  
  260.  
  261. application = webapp.WSGIApplication(
  262.  
  263. [('/', MainPage),
  264.  
  265. ('/tagcloud', TagCloud),
  266.  
  267. ('/flush', Flush)],
  268.  
  269. debug=True)
  270.  
  271.  
  272.  
  273.  
  274.  
  275.  
  276.  
  277. def main():
  278.  
  279. run_wsgi_app(application)
  280.  
  281.  
  282.  
  283. if __name__ == "__main__":
  284.  
  285. main()

URL: http://www.doquangtu.net

Report this snippet


Comments

RSS Icon Subscribe to comments

You need to login to post a comment.