Revision: 49306
Initial Code
Initial URL
Initial Description
Initial Title
Initial Tags
Initial Language
at July 19, 2011 18:01 by magicrebirth
Initial Code
""" MoinMoin - Python Source Parser """
import cgi, sys, cStringIO
import keyword, token, tokenize
# Python Source Parser (does highlighting into HTML)
_KEYWORD = token.NT_OFFSET + 1
_TEXT = token.NT_OFFSET + 2
_colors = {
token.NUMBER: '#0080C0',
token.OP: '#0000C0',
token.STRING: '#004080',
tokenize.COMMENT: '#008000',
token.NAME: '#000000',
token.ERRORTOKEN: '#FF8080',
_KEYWORD: '#C00000',
_TEXT: '#000000',
}
class Parser(object):
""" Send colorized Python source HTML to output file (normally stdout).
"""
def _ _init_ _(self, raw, out=sys.stdout):
""" Store the source text. """
self.raw = raw.expandtabs( ).strip( )
self.out = out
def format(self):
""" Parse and send the colorized source to output. """
# Store line offsets in self.lines
self.lines = [0, 0]
pos = 0
while True:
pos = self.raw.find('\n', pos) + 1
if not pos: break
self.lines.append(pos)
self.lines.append(len(self.raw))
# Parse the source and write it
self.pos = 0
text = cStringIO.StringIO(self.raw)
self.out.write('<pre><font face="Lucida, Courier New">')
try:
for token in tokenize.generate_tokens(text.readline):
# unpack the components of each token
toktype, toktext, (srow, scol), (erow, ecol), line = token
if False: # You may enable this for debugging purposes only
print "type", toktype, token.tok_name[toktype],
print "text", toktext,
print "start", srow,scol, "end", erow,ecol, "<br>"
# Calculate new positions
oldpos = self.pos
newpos = self.lines[srow] + scol
self.pos = newpos + len(toktext)
# Handle newlines
if toktype in (token.NEWLINE, tokenize.NL):
self.out.write('\n')
continue
# Send the original whitespace, if needed
if newpos > oldpos:
self.out.write(self.raw[oldpos:newpos])
# Skip indenting tokens, since they're whitespace-only
if toktype in (token.INDENT, token.DEDENT):
self.pos = newpos
continue
# Map token type to a color group
if token.LPAR <= toktype <= token.OP:
toktype = token.OP
elif toktype == token.NAME and keyword.iskeyword(toktext):
toktype = _KEYWORD
color = _colors.get(toktype, _colors[_TEXT])
style = ''
if toktype == token.ERRORTOKEN:
style = ' style="border: solid 1.5pt #FF0000;"'
# Send text
self.out.write('<font color="%s"%s>' % (color, style))
self.out.write(cgi.escape(toktext))
self.out.write('</font>')
except tokenize.TokenError, ex:
msg = ex[0]
line = ex[1][0]
self.out.write("<h3>ERROR: %s</h3>%s\n" % (
msg, self.raw[self.lines[line]:]))
self.out.write('</font></pre>')
if _ _name_ _ == "_ _main_ _":
print "Formatting..."
# Open own source
source = open('python.py').read( )
# Write colorized version to "python.html"
Parser(source, open('python.html', 'wt')).format( )
# Load HTML page into browser
import webbrowser
webbrowser.open("python.html")
Initial URL
http://book.opensourceproject.org.cn/lamp/python/pythoncook2/opensource/0596007973/pythoncook2-chp-16-sect-6.html
Initial Description
You need to convert Python source code into HTML markup, rendering comments, keywords, operators, and numeric and string literals in different colors. tokenize.generate_tokens does most of the work. We just need to loop over all tokens it finds, to output them with appropriate colorization:
Initial Title
Colorizing Python Source Using the Built-in Tokenizer
Initial Tags
html
Initial Language
Python