#!/usr/bin/perl # Badly named script. Converts non-ascii UTF-8 text into equivalent decimal # entities for use in HTML and XML. # Warning: not sure which versions of perl work. This GIVES WRONG ANSWERS on # some computers I have access to, but does on others. I'd go with the python or # C versions, they're both faster anyway. use encoding 'utf8'; while (<>) { s/([^[:ascii:]])/sprintf("&#%d;",ord($1))/eg; print $_; }