Revision: 40737
Updated Code
at February 6, 2011 13:28 by cortesi
Updated Code
import re
TAG = r"""
<\s*
(?!\s*[!"])
(?P<close>\s*\/)?
(?P<name>\w+)
(
[^'"\t >]+
|
"[^\"]*"['\"]*
|
'[^']*'['\"]*
|
\s+
)*
(?P<selfcont>\s*\/\s*)?
\s*>
"""
UNI = set(["br", "hr", "img", "input", "area", "link"])
INDENT = " "*4
def pretty_xmlish(s):
"""
This is a robust, general indenter for XML-ish data.
Returns a list of lines.
"""
data, offset, indent, prev = [], 0, 0, None
for i in re.finditer(TAG, s, re.VERBOSE|re.MULTILINE):
start, end = i.span()
name = i.group("name")
if start > offset:
txt = []
for x in textwrap.dedent(s[offset:start]).split("\n"):
if x.strip():
txt.append(indent*INDENT + x)
data.extend(txt)
if i.group("close") and not (name in UNI and name==prev):
indent = max(indent - 1, 0)
data.append(indent*INDENT + i.group().strip())
offset = end
if not any([i.group("close"), i.group("selfcont"), name in UNI]):
indent += 1
prev = name
return data
Revision: 40736
Initial Code
Initial URL
Initial Description
Initial Title
Initial Tags
Initial Language
at February 6, 2011 13:27 by cortesi
Initial Code
import re
TAG = r"""
<\s*
(?!\s*[!"])
(?P<close>\s*\/)?
(?P<name>\w+)
(
[^'"\t >]+
|
"[^\"]*"['\"]*
|
'[^']*'['\"]*
|
\s+
)*
(?P<selfcont>\s*\/\s*)?
\s*>
"""
UNI = set(["br", "hr", "img", "input", "area", "link"])
INDENT = " "*4
def pretty_xmlish(s):
"""
This is a robust, general pretty-printer for XML-ish data.
Returns a list of lines.
"""
data, offset, indent, prev = [], 0, 0, None
for i in re.finditer(TAG, s, re.VERBOSE|re.MULTILINE):
start, end = i.span()
name = i.group("name")
if start > offset:
txt = []
for x in textwrap.dedent(s[offset:start]).split("\n"):
if x.strip():
txt.append(indent*INDENT + x)
data.extend(txt)
if i.group("close") and not (name in UNI and name==prev):
indent = max(indent - 1, 0)
data.append(indent*INDENT + i.group().strip())
offset = end
if not any([i.group("close"), i.group("selfcont"), name in UNI]):
indent += 1
prev = name
return data
Initial URL
Initial Description
Initial Title
Robust pretty-printing for XML-like data
Initial Tags
Initial Language
Python