Revision: 17583
Updated Code
at September 10, 2009 11:54 by manatlan
Updated Code
from htmlentitydefs import name2codepoint as n2cp
import re
def substitute_entity(match):
ent = match.group(3)
if match.group(1) == "#":
if match.group(2) == '':
return unichr(int(ent))
elif match.group(2) == 'x':
return unichr(int('0x'+ent, 16))
else:
cp = n2cp.get(ent)
if cp:
return unichr(cp)
else:
return match.group()
def decode_htmlentities(string):
entity_re = re.compile(r'&(#?)(x?)(\w+);')
return entity_re.subn(substitute_entity, string)[0]
Revision: 17582
Initial Code
Initial URL
Initial Description
Initial Title
Initial Tags
Initial Language
at September 10, 2009 03:11 by manatlan
Initial Code
from htmlentitydefs import name2codepoint as n2cp
import re
def substitute_entity(match):
ent = match.group(2)
if match.group(1) == "#":
return unichr(int(ent))
else:
cp = n2cp.get(ent)
if cp:
return unichr(cp)
else:
return match.group()
def decode_htmlentities(string):
entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8});")
return entity_re.subn(substitute_entity, string)[0]
Initial URL
Initial Description
ex : decode_htmlentities("l'eau")
Initial Title
decode html entities
Initial Tags
html, python, text
Initial Language
Python