Colorizing Python Source Using the Built-in Tokenizer


/ Published in: Python
Save to your folder(s)

You need to convert Python source code into HTML markup, rendering comments, keywords, operators, and numeric and string literals in different colors.
tokenize.generate_tokens does most of the work. We just need to loop over all tokens it finds, to output them with appropriate colorization:


Copy this code and paste it in your HTML
  1. """ MoinMoin - Python Source Parser """
  2. import cgi, sys, cStringIO
  3. import keyword, token, tokenize
  4. # Python Source Parser (does highlighting into HTML)
  5. _KEYWORD = token.NT_OFFSET + 1
  6. _TEXT = token.NT_OFFSET + 2
  7. _colors = {
  8. token.NUMBER: '#0080C0',
  9. token.OP: '#0000C0',
  10. token.STRING: '#004080',
  11. tokenize.COMMENT: '#008000',
  12. token.NAME: '#000000',
  13. token.ERRORTOKEN: '#FF8080',
  14. _KEYWORD: '#C00000',
  15. _TEXT: '#000000',
  16. }
  17. class Parser(object):
  18. """ Send colorized Python source HTML to output file (normally stdout).
  19. """
  20. def _ _init_ _(self, raw, out=sys.stdout):
  21. """ Store the source text. """
  22. self.raw = raw.expandtabs( ).strip( )
  23. self.out = out
  24. def format(self):
  25. """ Parse and send the colorized source to output. """
  26. # Store line offsets in self.lines
  27. self.lines = [0, 0]
  28. pos = 0
  29. while True:
  30. pos = self.raw.find('\n', pos) + 1
  31. if not pos: break
  32. self.lines.append(pos)
  33. self.lines.append(len(self.raw))
  34. # Parse the source and write it
  35. self.pos = 0
  36. text = cStringIO.StringIO(self.raw)
  37. self.out.write('<pre><font face="Lucida, Courier New">')
  38. try:
  39. for token in tokenize.generate_tokens(text.readline):
  40. # unpack the components of each token
  41. toktype, toktext, (srow, scol), (erow, ecol), line = token
  42. if False: # You may enable this for debugging purposes only
  43. print "type", toktype, token.tok_name[toktype],
  44. print "text", toktext,
  45. print "start", srow,scol, "end", erow,ecol, "<br>"
  46. # Calculate new positions
  47. oldpos = self.pos
  48. newpos = self.lines[srow] + scol
  49. self.pos = newpos + len(toktext)
  50. # Handle newlines
  51. if toktype in (token.NEWLINE, tokenize.NL):
  52. self.out.write('\n')
  53. continue
  54. # Send the original whitespace, if needed
  55. if newpos > oldpos:
  56. self.out.write(self.raw[oldpos:newpos])
  57. # Skip indenting tokens, since they're whitespace-only
  58. if toktype in (token.INDENT, token.DEDENT):
  59. self.pos = newpos
  60. continue
  61. # Map token type to a color group
  62. if token.LPAR <= toktype <= token.OP:
  63. toktype = token.OP
  64. elif toktype == token.NAME and keyword.iskeyword(toktext):
  65. toktype = _KEYWORD
  66. color = _colors.get(toktype, _colors[_TEXT])
  67. style = ''
  68. if toktype == token.ERRORTOKEN:
  69. style = ' style="border: solid 1.5pt #FF0000;"'
  70. # Send text
  71. self.out.write('<font color="%s"%s>' % (color, style))
  72. self.out.write(cgi.escape(toktext))
  73. self.out.write('</font>')
  74. except tokenize.TokenError, ex:
  75. msg = ex[0]
  76. line = ex[1][0]
  77. self.out.write("<h3>ERROR: %s</h3>%s\n" % (
  78. msg, self.raw[self.lines[line]:]))
  79. self.out.write('</font></pre>')
  80. if _ _name_ _ == "_ _main_ _":
  81. print "Formatting..."
  82. # Open own source
  83. source = open('python.py').read( )
  84. # Write colorized version to "python.html"
  85. Parser(source, open('python.html', 'wt')).format( )
  86. # Load HTML page into browser
  87. import webbrowser
  88. webbrowser.open("python.html")

URL: http://book.opensourceproject.org.cn/lamp/python/pythoncook2/opensource/0596007973/pythoncook2-chp-16-sect-6.html

Report this snippet


Comments

RSS Icon Subscribe to comments

You need to login to post a comment.