#!/usr/bin/python """Badly named script, roughly opposite to utf8_to_ascii.py: removes decimal entities in HTML or XML replacing them with utf8 equivalents. ** Haven't really tested this, don't know which python versions are ok **""" from fileinput import input import re entity=re.compile(r'&#([0-9]+);') for txt in input(): print entity.sub(lambda x: unichr(int(x.group(1))).encode('utf-8'),txt),