expanding character entity references in javascript

Discussion in 'Javascript' started by Jim Higson, Mar 12, 2005.

  1. Jim Higson

    Jim Higson Guest

    Does anyone know a technique in javascript to transform from (for example)
    ♥ to the char '♥'?

    I'm doing this because I have to interpret some data I got over XHTMLHTTP
    that isn't XML, but might contain some XML char entities.

    Thanks,
    Jim
    Jim Higson, Mar 12, 2005
    #1
    1. Advertising

  2. Jim Higson

    Jim Higson Guest

    Jim Higson wrote:

    > Does anyone know a technique in javascript to transform from (for example)
    > ♥ to the char '♥'?
    >
    > I'm doing this because I have to interpret some data I got over XHTMLHTTP
    > that isn't XML, but might contain some XML char entities.


    Btw, I'm using XHTML so setting innerHTML on a temp element and then reading
    the contents isn't really an option. Under moz this gives an error because
    innerHTML is read only.

    Jim
    Jim Higson, Mar 12, 2005
    #2
    1. Advertising

  3. > Does anyone know a technique in javascript to transform from (for example)
    > ♥ to the char '♥'?
    >
    > I'm doing this because I have to interpret some data I got over XHTMLHTTP
    > that isn't XML, but might contain some XML char entities.


    String.prototype.deentityify = function (o) {
    var i, j, s = this, o = String.prototype.deentityify.data, v;
    for (;;) {
    i = s.lastIndexOf('&');
    if (i < 0) {
    break;
    }
    j = s.indexOf(';', i);
    if (i + 1 >= j) {
    break;
    }
    v = o[s.substring(i + 1, j)];
    if (!v) {
    break;
    }
    s = s.substring(0, i) + v + s.substring(j + 1);
    }
    return s;
    }

    String.prototype.deentityify.data = {
    apos: "'",
    lt: '<',
    gt: '>'};

    var s = "&lt;cool&gt;".deentityify();

    http://www.JSLint.com
    Douglas Crockford, Mar 12, 2005
    #3
  4. Jim Higson

    Jim Higson Guest

    Douglas Crockford wrote:

    >> Does anyone know a technique in javascript to transform from (for
    >> example) &hearts; to the char '♥'?
    >>
    >> I'm doing this because I have to interpret some data I got over XHTMLHTTP
    >> that isn't XML, but might contain some XML char entities.

    >
    > String.prototype.deentityify = function (o) {
    > var i, j, s = this, o = String.prototype.deentityify.data, v;
    > for (;;) {
    > i = s.lastIndexOf('&');
    > if (i < 0) {
    > break;
    > }
    > j = s.indexOf(';', i);
    > if (i + 1 >= j) {
    > break;
    > }
    > v = o[s.substring(i + 1, j)];
    > if (!v) {
    > break;
    > }
    > s = s.substring(0, i) + v + s.substring(j + 1);
    > }
    > return s;
    > }
    >
    > String.prototype.deentityify.data = {
    > apos: "'",
    > lt: '<',
    > gt: '>'};
    >
    > var s = "&lt;cool&gt;".deentityify();
    >
    > http://www.JSLint.com



    Thanks for the response, but I needed something that can handle any char
    reference. So I built pu the hash with some js code generated from a quick
    Perl script taking values from the W3C's XHTML DTD. Add a bit of regex and
    I had it done straight away before I saw your response. Once I'd resigned
    to coding this in javascript (instead of tricking the browser into decoding
    the references) it was actually pretty easy.

    I'll post the output in case anyone wants to do this (it's about 3k, or 1k
    with content-type gzip)


    var CHAR_REF_REGEX = /&(\w{2,7}|#\d{3,4});/g;
    function expand_char_references( str )
    {
    var rtn = "";
    var hit;
    var last_ref_end = 0;
    while( (hit = CHAR_REF_REGEX.exec( str )) != null )
    {
    var charcode = new Number( hit[1] );

    if( isNaN( charcode ) )
    charcode = CHAR_ENTITIES[ hit[1].toLowerCase() ];

    rtn += str.substring( last_ref_end , hit.index ) +
    String.fromCharCode( charcode );
    last_ref_end = hit.index + hit[0].length;
    }
    // if no matches, this will just be all of str:
    rtn += str.substring( last_ref_end );
    return rtn;
    }
    var
    CHAR_ENTITIES={'nbsp':160,'iexcl':161,'cent':162,'pound':163,'curren':164,'yen':165,'brvbar':166,'sect':167,'uml':168,'copy':169,'ordf':170,'laquo':171,'not':172,'shy':173,'reg':174,'macr':175,'deg':176,'plusmn':177,'sup2':178,'sup3':179,'acute':180,'micro':181,'para':182,'middot':183,'cedil':184,'sup1':185,'ordm':186,'raquo':187,'frac14':188,'frac12':189,'frac34':190,'iquest':191,'agrave':192,'aacute':193,'acirc':194,'atilde':195,'auml':196,'aring':197,'aelig':198,'ccedil':199,'egrave':200,'eacute':201,'ecirc':202,'euml':203,'igrave':204,'iacute':205,'icirc':206,'iuml':207,'eth':208,'ntilde':209,'ograve':210,'oacute':211,'ocirc':212,'otilde':213,'ouml':214,'times':215,'oslash':216,'ugrave':217,'uacute':218,'ucirc':219,'uuml':220,'yacute':221,'thorn':222,'szlig':223,'agrave':224,'aacute':225,'acirc':226,'atilde':227,'auml':228,'aring':229,'aelig':230,'ccedil':231,'egrave':232,'eacute':233,'ecirc':234,'euml':235,'igrave':236,'iacute':237,'icirc':238,'iuml':239,'eth':240,'ntilde':241,'ograve':242,'oacute':243,'ocirc':244,'otilde':245,'ouml':246,'divide':247,'oslash':248,'ugrave':249,'uacute':250,'ucirc':251,'uuml':252,'yacute':253,'thorn':254,'yuml':255,'lt':38,'gt':62,'amp':38,'apos':39,'quot':34,'oelig':338,'oelig':339,'scaron':352,'scaron':353,'yuml':376,'circ':710,'tilde':732,'ensp':8194,'emsp':8195,'thinsp':8201,'zwnj':8204,'zwj':8205,'lrm':8206,'rlm':8207,'ndash':8211,'mdash':8212,'lsquo':8216,'rsquo':8217,'sbquo':8218,'ldquo':8220,'rdquo':8221,'bdquo':8222,'dagger':8224,'dagger':8225,'permil':8240,'lsaquo':8249,'rsaquo':8250,'euro':8364,'fnof':402,'alpha':913,'beta':914,'gamma':915,'delta':916,'epsilon':917,'zeta':918,'eta':919,'theta':920,'iota':921,'kappa':922,'lambda':923,'mu':924,'nu':925,'xi':926,'omicron':927,'pi':928,'rho':929,'sigma':931,'tau':932,'upsilon':933,'phi':934,'chi':935,'psi':936,'omega':937,'alpha':945,'beta':946,'gamma':947,'delta':948,'epsilon':949,'zeta':950,'eta':951,'theta':952,'iota':953,'kappa':954,'lambda':955,'mu':956,'nu':957,'xi':958,'omicron':959,'pi':960,'rho':961,'sigmaf':962,'sigma':963,'tau':964,'upsilon':965,'phi':966,'chi':967,'psi':968,'omega':969,'thetasym':977,'upsih':978,'piv':982,'bull':8226,'hellip':8230,'prime':8242,'prime':8243,'oline':8254,'frasl':8260,'weierp':8472,'image':8465,'real':8476,'trade':8482,'alefsym':8501,'larr':8592,'uarr':8593,'rarr':8594,'darr':8595,'harr':8596,'crarr':8629,'larr':8656,'uarr':8657,'rarr':8658,'darr':8659,'harr':8660,'forall':8704,'part':8706,'exist':8707,'empty':8709,'nabla':8711,'isin':8712,'notin':8713,'ni':8715,'prod':8719,'sum':8721,'minus':8722,'lowast':8727,'radic':8730,'prop':8733,'infin':8734,'ang':8736,'and':8743,'or':8744,'cap':8745,'cup':8746,'int':8747,'there4':8756,'sim':8764,'cong':8773,'asymp':8776,'ne':8800,'equiv':8801,'le':8804,'ge':8805,'sub':8834,'sup':8835,'nsub':8836,'sube':8838,'supe':8839,'oplus':8853,'otimes':8855,'perp':8869,'sdot':8901,'lceil':8968,'rceil':8969,'lfloor':8970,'rfloor':8971,'lang':9001,'rang':9002,'loz':9674,'spades':9824,'clubs':9827,'hearts':9829,'diams':9830}
    Jim Higson, Mar 13, 2005
    #4
    1. Advertising

Want to reply to this thread or ask your own question?

It takes just 2 minutes to sign up (and it's free!). Just click the sign up button to choose a username and then you can ask your own questions on the forum.
Similar Threads
  1. Replies:
    2
    Views:
    479
  2. Alia Khouri
    Replies:
    2
    Views:
    274
    Alia Khouri
    Jul 23, 2007
  3. markla
    Replies:
    1
    Views:
    540
    Steven Cheng
    Oct 6, 2008
  4. C Davis
    Replies:
    2
    Views:
    414
    Andy Dingley
    Feb 3, 2009
  5. User Axes Dean Eyed

    Character Entity References & DOM

    User Axes Dean Eyed, Oct 6, 2004, in forum: Javascript
    Replies:
    1
    Views:
    152
    Michael Winter
    Oct 7, 2004
Loading...

Share This Page