expanding character entity references in javascript

J

Jim Higson

Does anyone know a technique in javascript to transform from (for example)
♥ to the char '♥'?

I'm doing this because I have to interpret some data I got over XHTMLHTTP
that isn't XML, but might contain some XML char entities.

Thanks,
Jim
 
J

Jim Higson

Jim said:
Does anyone know a technique in javascript to transform from (for example)
♥ to the char '♥'?

I'm doing this because I have to interpret some data I got over XHTMLHTTP
that isn't XML, but might contain some XML char entities.

Btw, I'm using XHTML so setting innerHTML on a temp element and then reading
the contents isn't really an option. Under moz this gives an error because
innerHTML is read only.

Jim
 
D

Douglas Crockford

Does anyone know a technique in javascript to transform from (for example)
♥ to the char '♥'?

I'm doing this because I have to interpret some data I got over XHTMLHTTP
that isn't XML, but might contain some XML char entities.

String.prototype.deentityify = function (o) {
var i, j, s = this, o = String.prototype.deentityify.data, v;
for (;;) {
i = s.lastIndexOf('&');
if (i < 0) {
break;
}
j = s.indexOf(';', i);
if (i + 1 >= j) {
break;
}
v = o[s.substring(i + 1, j)];
if (!v) {
break;
}
s = s.substring(0, i) + v + s.substring(j + 1);
}
return s;
}

String.prototype.deentityify.data = {
apos: "'",
lt: '<',
gt: '>'};

var s = "&lt;cool&gt;".deentityify();

http://www.JSLint.com
 
J

Jim Higson

Douglas said:
Does anyone know a technique in javascript to transform from (for
example) &hearts; to the char '♥'?

I'm doing this because I have to interpret some data I got over XHTMLHTTP
that isn't XML, but might contain some XML char entities.

String.prototype.deentityify = function (o) {
var i, j, s = this, o = String.prototype.deentityify.data, v;
for (;;) {
i = s.lastIndexOf('&');
if (i < 0) {
break;
}
j = s.indexOf(';', i);
if (i + 1 >= j) {
break;
}
v = o[s.substring(i + 1, j)];
if (!v) {
break;
}
s = s.substring(0, i) + v + s.substring(j + 1);
}
return s;
}

String.prototype.deentityify.data = {
apos: "'",
lt: '<',
gt: '>'};

var s = "&lt;cool&gt;".deentityify();

http://www.JSLint.com


Thanks for the response, but I needed something that can handle any char
reference. So I built pu the hash with some js code generated from a quick
Perl script taking values from the W3C's XHTML DTD. Add a bit of regex and
I had it done straight away before I saw your response. Once I'd resigned
to coding this in javascript (instead of tricking the browser into decoding
the references) it was actually pretty easy.

I'll post the output in case anyone wants to do this (it's about 3k, or 1k
with content-type gzip)


var CHAR_REF_REGEX = /&(\w{2,7}|#\d{3,4});/g;
function expand_char_references( str )
{
var rtn = "";
var hit;
var last_ref_end = 0;
while( (hit = CHAR_REF_REGEX.exec( str )) != null )
{
var charcode = new Number( hit[1] );

if( isNaN( charcode ) )
charcode = CHAR_ENTITIES[ hit[1].toLowerCase() ];

rtn += str.substring( last_ref_end , hit.index ) +
String.fromCharCode( charcode );
last_ref_end = hit.index + hit[0].length;
}
// if no matches, this will just be all of str:
rtn += str.substring( last_ref_end );
return rtn;
}
var
CHAR_ENTITIES={'nbsp':160,'iexcl':161,'cent':162,'pound':163,'curren':164,'yen':165,'brvbar':166,'sect':167,'uml':168,'copy':169,'ordf':170,'laquo':171,'not':172,'shy':173,'reg':174,'macr':175,'deg':176,'plusmn':177,'sup2':178,'sup3':179,'acute':180,'micro':181,'para':182,'middot':183,'cedil':184,'sup1':185,'ordm':186,'raquo':187,'frac14':188,'frac12':189,'frac34':190,'iquest':191,'agrave':192,'aacute':193,'acirc':194,'atilde':195,'auml':196,'aring':197,'aelig':198,'ccedil':199,'egrave':200,'eacute':201,'ecirc':202,'euml':203,'igrave':204,'iacute':205,'icirc':206,'iuml':207,'eth':208,'ntilde':209,'ograve':210,'oacute':211,'ocirc':212,'otilde':213,'ouml':214,'times':215,'oslash':216,'ugrave':217,'uacute':218,'ucirc':219,'uuml':220,'yacute':221,'thorn':222,'szlig':223,'agrave':224,'aacute':225,'acirc':226,'atilde':227,'auml':228,'aring':229,'aelig':230,'ccedil':231,'egrave':232,'eacute':233,'ecirc':234,'euml':235,'igrave':236,'iacute':237,'icirc':238,'iuml':239,'eth':240,'ntilde':241,'ograve':242,'oacute':243,'ocirc':244,'otilde':245,'ouml':246,'divide':247,'oslash':248,'ugrave':249,'uacute':250,'ucirc':251,'uuml':252,'yacute':253,'thorn':254,'yuml':255,'lt':38,'gt':62,'amp':38,'apos':39,'quot':34,'oelig':338,'oelig':339,'scaron':352,'scaron':353,'yuml':376,'circ':710,'tilde':732,'ensp':8194,'emsp':8195,'thinsp':8201,'zwnj':8204,'zwj':8205,'lrm':8206,'rlm':8207,'ndash':8211,'mdash':8212,'lsquo':8216,'rsquo':8217,'sbquo':8218,'ldquo':8220,'rdquo':8221,'bdquo':8222,'dagger':8224,'dagger':8225,'permil':8240,'lsaquo':8249,'rsaquo':8250,'euro':8364,'fnof':402,'alpha':913,'beta':914,'gamma':915,'delta':916,'epsilon':917,'zeta':918,'eta':919,'theta':920,'iota':921,'kappa':922,'lambda':923,'mu':924,'nu':925,'xi':926,'omicron':927,'pi':928,'rho':929,'sigma':931,'tau':932,'upsilon':933,'phi':934,'chi':935,'psi':936,'omega':937,'alpha':945,'beta':946,'gamma':947,'delta':948,'epsilon':949,'zeta':950,'eta':951,'theta':952,'iota':953,'kappa':954,'lambda':955,'mu':956,'nu':957,'xi':958,'omicron':959,'pi':960,'rho':961,'sigmaf':962,'sigma':963,'tau':964,'upsilon':965,'phi':966,'chi':967,'psi':968,'omega':969,'thetasym':977,'upsih':978,'piv':982,'bull':8226,'hellip':8230,'prime':8242,'prime':8243,'oline':8254,'frasl':8260,'weierp':8472,'image':8465,'real':8476,'trade':8482,'alefsym':8501,'larr':8592,'uarr':8593,'rarr':8594,'darr':8595,'harr':8596,'crarr':8629,'larr':8656,'uarr':8657,'rarr':8658,'darr':8659,'harr':8660,'forall':8704,'part':8706,'exist':8707,'empty':8709,'nabla':8711,'isin':8712,'notin':8713,'ni':8715,'prod':8719,'sum':8721,'minus':8722,'lowast':8727,'radic':8730,'prop':8733,'infin':8734,'ang':8736,'and':8743,'or':8744,'cap':8745,'cup':8746,'int':8747,'there4':8756,'sim':8764,'cong':8773,'asymp':8776,'ne':8800,'equiv':8801,'le':8804,'ge':8805,'sub':8834,'sup':8835,'nsub':8836,'sube':8838,'supe':8839,'oplus':8853,'otimes':8855,'perp':8869,'sdot':8901,'lceil':8968,'rceil':8969,'lfloor':8970,'rfloor':8971,'lang':9001,'rang':9002,'loz':9674,'spades':9824,'clubs':9827,'hearts':9829,'diams':9830}
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Members online

No members online now.

Forum statistics

Threads
473,763
Messages
2,569,563
Members
45,039
Latest member
CasimiraVa

Latest Threads

Top