Convert native character string to ASCII array of integers

Discussion in 'C Programming' started by Tomás Ó hÉilidhe, Mar 28, 2008.

  1. Given a string in the computer's native character set such as:

    "Hello"

    , I want to convert it to an array of integers representing the ASCII
    values of the characters. The reason I want to do this is that I'll be
    passing the ASCII array to a cryptographic hash function. In order to
    make my program fully portable so that it will run properly on
    machines where the default character set isn't ASCII, I've created a
    MakeASCII function.

    Please my MakeASCII function! Rip it apart!

    typedef char unsigned OctetStorage;

    void MakeASCII(OctetStorage *pos,char const *pc)
    {
    for( ; ; ++pos,++pc)
    {
    switch (*pc)
    {
    case ' ': *pos = 0x20u; break;
    case '!': *pos = 0x21u; break;
    case '\"': *pos = 0x22u; break;
    case '#': *pos = 0x23u; break;
    case '$': *pos = 0x24u; break;
    case '%': *pos = 0x25u; break;
    case '&': *pos = 0x26u; break;
    case '\'': *pos = 0x27u; break;
    case '(': *pos = 0x28u; break;
    case ')': *pos = 0x29u; break;
    case '*': *pos = 0x2Au; break;
    case '+': *pos = 0x2Bu; break;
    case ',': *pos = 0x2Cu; break;
    case '-': *pos = 0x2Du; break;
    case '.': *pos = 0x2Eu; break;
    case '/': *pos = 0x2Fu; break;
    case '0': *pos = 0x30u; break;
    case '1': *pos = 0x31u; break;
    case '2': *pos = 0x32u; break;
    case '3': *pos = 0x33u; break;
    case '4': *pos = 0x34u; break;
    case '5': *pos = 0x35u; break;
    case '6': *pos = 0x36u; break;
    case '7': *pos = 0x37u; break;
    case '8': *pos = 0x38u; break;
    case '9': *pos = 0x39u; break;
    case ':': *pos = 0x3Au; break;
    case ';': *pos = 0x3Bu; break;
    case '<': *pos = 0x3Cu; break;
    case '=': *pos = 0x3Du; break;
    case '>': *pos = 0x3Eu; break;
    case '?': *pos = 0x3Fu; break;
    case '@': *pos = 0x40u; break;
    case 'A': *pos = 0x41u; break;
    case 'B': *pos = 0x42u; break;
    case 'C': *pos = 0x43u; break;
    case 'D': *pos = 0x44u; break;
    case 'E': *pos = 0x45u; break;
    case 'F': *pos = 0x46u; break;
    case 'G': *pos = 0x47u; break;
    case 'H': *pos = 0x48u; break;
    case 'I': *pos = 0x49u; break;
    case 'J': *pos = 0x4Au; break;
    case 'K': *pos = 0x4Bu; break;
    case 'L': *pos = 0x4Cu; break;
    case 'M': *pos = 0x4Du; break;
    case 'N': *pos = 0x4Eu; break;
    case 'O': *pos = 0x4Fu; break;
    case 'P': *pos = 0x50u; break;
    case 'Q': *pos = 0x51u; break;
    case 'R': *pos = 0x52u; break;
    case 'S': *pos = 0x53u; break;
    case 'T': *pos = 0x54u; break;
    case 'U': *pos = 0x55u; break;
    case 'V': *pos = 0x56u; break;
    case 'W': *pos = 0x57u; break;
    case 'X': *pos = 0x58u; break;
    case 'Y': *pos = 0x59u; break;
    case 'Z': *pos = 0x5Au; break;
    case '[': *pos = 0x5Bu; break;
    case '\\': *pos = 0x5Cu; break;
    case ']': *pos = 0x5Du; break;
    case '^': *pos = 0x5Eu; break;
    case '_': *pos = 0x5Fu; break;
    case '`': *pos = 0x60u; break;
    case 'a': *pos = 0x61u; break;
    case 'b': *pos = 0x62u; break;
    case 'c': *pos = 0x63u; break;
    case 'd': *pos = 0x64u; break;
    case 'e': *pos = 0x65u; break;
    case 'f': *pos = 0x66u; break;
    case 'g': *pos = 0x67u; break;
    case 'h': *pos = 0x68u; break;
    case 'i': *pos = 0x69u; break;
    case 'j': *pos = 0x6Au; break;
    case 'k': *pos = 0x6Bu; break;
    case 'l': *pos = 0x6Cu; break;
    case 'm': *pos = 0x6Du; break;
    case 'n': *pos = 0x6Eu; break;
    case 'o': *pos = 0x6Fu; break;
    case 'p': *pos = 0x70u; break;
    case 'q': *pos = 0x71u; break;
    case 'r': *pos = 0x72u; break;
    case 's': *pos = 0x73u; break;
    case 't': *pos = 0x74u; break;
    case 'u': *pos = 0x75u; break;
    case 'v': *pos = 0x76u; break;
    case 'w': *pos = 0x77u; break;
    case 'x': *pos = 0x78u; break;
    case 'y': *pos = 0x79u; break;
    case 'z': *pos = 0x7Au; break;
    case '{': *pos = 0x7Bu; break;
    case '|': *pos = 0x7Cu; break;
    case '}': *pos = 0x7Du; break;
    case '~': *pos = 0x7Eu; break;

    case 0: *pos = 0; return;

    default: *pos = *pc;
    }
    }
    }
    Tomás Ó hÉilidhe, Mar 28, 2008
    #1
    1. Advertising

  2. Tomás Ó hÉilidhe said:

    <snip>

    > Please my MakeASCII function! Rip it apart!


    Well, I won't rip it apart, but I think I can let a little air out of it.

    #include <string.h>

    void MakeASCII(unsigned char *pos,char const *pc)
    {
    const char *bcs =
    " !\"#$%&'()*+,-./0123456789:;<=>?@"
    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
    "[\\]^_`"
    "abcdefghijklmnopqrstuvwxyz"
    "{|}~";
    const char *cur = NULL;

    while(*pc != '\0')
    {
    cur = strchr(bcs, *pc);
    if(cur != NULL)
    {
    *pos++ = (cur - bcs) + 32;
    }
    else
    {
    *pos++ = *pc;
    }
    ++pc;
    }
    *pos = '\0';
    }

    If you hit performance issues with that one, consider this alternative:

    #include <string.h>
    #include <limits.h>

    void MakeASCII(unsigned char *pos,char const *pc)
    {
    const char *bcs =
    " !\"#$%&'()*+,-./0123456789:;<=>?@"
    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
    "[\\]^_`"
    "abcdefghijklmnopqrstuvwxyz"
    "{|}~";
    static char att[UCHAR_MAX + 1] = {0};

    const char *cur = bcs;
    int i = 0;

    if(att[' '] != 32) /* do we need to set up the array? */
    {
    /* defaults */
    for(i = 0; i < UCHAR_MAX + 1; i++)
    {
    att = (char)i;
    }

    /* known ASCII characters */
    i = 32;
    while(*cur != '\0')
    {
    att[*cur++] = i++;
    }
    }

    while(*pos++ = att[*pc++])
    {
    continue;
    }
    }


    --
    Richard Heathfield <http://www.cpax.org.uk>
    Email: -http://www. +rjh@
    Google users: <http://www.cpax.org.uk/prg/writings/googly.php>
    "Usenet is a strange place" - dmr 29 July 1999
    Richard Heathfield, Mar 28, 2008
    #2
    1. Advertising

  3. Richard Heathfield:

    > #include <string.h>
    >
    > void MakeASCII(unsigned char *pos,char const *pc)
    > {
    >   const char *bcs =
    >     " !\"#$%&'()*+,-./0123456789:;<=>?@"
    >     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
    >     "[\\]^_`"
    >     "abcdefghijklmnopqrstuvwxyz"
    >     "{|}~";
    >   const char *cur = NULL;
    >
    >   while(*pc != '\0')
    >   {
    >     cur = strchr(bcs, *pc);
    >     if(cur != NULL)
    >     {
    >       *pos++ = (cur - bcs) + 32;
    >     }
    >     else
    >     {
    >       *pos++ = *pc;
    >     }
    >     ++pc;
    >   }
    >   *pos = '\0';
    >
    > }
    >
    > If you hit performance issues with that one, consider this alternative:
    >
    > #include <string.h>
    > #include <limits.h>
    >
    > void MakeASCII(unsigned char *pos,char const *pc)
    > {
    >   const char *bcs =
    >     " !\"#$%&'()*+,-./0123456789:;<=>?@"
    >     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
    >     "[\\]^_`"
    >     "abcdefghijklmnopqrstuvwxyz"
    >     "{|}~";
    >   static char att[UCHAR_MAX + 1] = {0};
    >
    >   const char *cur = bcs;
    >   int i = 0;
    >
    >   if(att[' '] != 32) /* do we need to set up the array? */
    >   {
    >     /* defaults */
    >     for(i = 0; i < UCHAR_MAX + 1; i++)
    >     {
    >       att = (char)i;
    >     }
    >
    >     /* known ASCII characters */
    >     i = 32;
    >     while(*cur != '\0')
    >     {
    >       att[*cur++] = i++;
    >     }
    >   }
    >
    >   while(*pos++ = att[*pc++])
    >   {
    >     continue;
    >   }
    >
    > }



    Very nice, the look-up method hadn't crossed my mind.

    If we can be sure that all characters will be valid ASCII characters
    then we can do the following:

    #include <string.h> /* strchr */
    #include <stdio.h> /* puts */

    typedef char OctetStorage;

    void MakeASCII(OctetStorage *pos,char const *pc)
    {
    static char const ascii[] =
    " !\"#$%&'()*+,-./0123456789:;<=>?@"
    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
    "[\\]^_`"
    "abcdefghijklmnopqrstuvwxyz"
    "{|}~";


    for ( ; *pc; ++pos, ++pc)
    *pos = strchr(ascii,*pc) - ascii + ' ';

    *pos = 0;
    }

    int main(void)
    {
    char hello[] = "hello";

    MakeASCII(hello,hello);

    puts(hello);

    return 0;
    }

    I wasn't sure whether I was able to replace:

    for ( ; *pc; ++pos, ++pc)
    *pos = strchr(ascii,*pc) - ascii + ' ';

    with:

    while (*pc) *pos++ = strchr(ascii,*pc++) - ascii + ' ';

    I thought there might be a sequence point violation if pos and pc
    point to the same thing.. ?
    Tomás Ó hÉilidhe, Mar 28, 2008
    #3
  4. On Mar 28, 1:09 pm, Tomás Ó hÉilidhe <> wrote:

    >        *pos = strchr(ascii,*pc) - ascii + ' ';



    That space should of course be the ASCII value for space:

    *pos = strchr(ascii,*pc) - ascii + 0x20;
    Tomás Ó hÉilidhe, Mar 28, 2008
    #4
  5. Tomás Ó hÉilidhe said:

    <snip>
    >
    > int main(void)
    > {
    > char hello[] = "hello";
    >
    > MakeASCII(hello,hello);

    <snip>
    > I wasn't sure whether I was able to replace:
    >
    > for ( ; *pc; ++pos, ++pc)
    > *pos = strchr(ascii,*pc) - ascii + ' ';


    Don't add ' ' if you really want to add 32 and may be running on a
    non-ASCII system! Add 32 instead (or ASCII_BASE, #defined to 32, or
    something like that). Also, be absolutely sure that there is no
    possibility of strchr(ascii, *pc) returning NULL!

    >
    > with:
    >
    > while (*pc) *pos++ = strchr(ascii,*pc++) - ascii + ' ';
    >
    > I thought there might be a sequence point violation if pos and pc
    > point to the same thing.. ?


    It's a valid point. If there is a risk of that, then make sure that the
    increments occur separately:

    while (*pc)
    {
    *pos = strchr(ascii,*pc) - ascii + ASCII_BASE;
    ++pos;
    ++pc;
    }

    --
    Richard Heathfield <http://www.cpax.org.uk>
    Email: -http://www. +rjh@
    Google users: <http://www.cpax.org.uk/prg/writings/googly.php>
    "Usenet is a strange place" - dmr 29 July 1999
    Richard Heathfield, Mar 28, 2008
    #5
  6. Tomás Ó hÉilidhe

    Willem Guest

    Richard Heathfield wrote:
    ) Tomás Ó hÉilidhe said:
    )> with:
    )>
    )> while (*pc) *pos++ = strchr(ascii,*pc++) - ascii + ' ';
    )>
    )> I thought there might be a sequence point violation if pos and pc
    )> point to the same thing.. ?
    )
    ) It's a valid point. If there is a risk of that, then make sure that the
    ) increments occur separately:

    I disagree. While pos and pc may point to the same thing, it's not
    the thing that is pointed to that gets incremented.


    SaSW, Willem
    --
    Disclaimer: I am in no way responsible for any of the statements
    made in the above text. For all I know I might be
    drugged or something..
    No I'm not paranoid. You all think I'm paranoid, don't you !
    #EOT
    Willem, Mar 28, 2008
    #6
  7. Richard Heathfield wrote:
    > Tomás Ó hÉilidhe said:
    >> with:
    >>
    >> while (*pc) *pos++ = strchr(ascii,*pc++) - ascii + ' ';
    >>
    >> I thought there might be a sequence point violation if pos and pc
    >> point to the same thing.. ?

    >
    > It's a valid point. If there is a risk of that, then make sure that the
    > increments occur separately:
    >
    > while (*pc)
    > {
    > *pos = strchr(ascii,*pc) - ascii + ASCII_BASE;
    > ++pos;
    > ++pc;
    > }


    I disagree that there is a risk of UB. If we give the object which pos
    and pc both point to a name "obj", then the statement in question has
    three effects:
    obj = strchr(ascii,obj) - ascii + ' ';
    pos++;
    pc++;
    I don't see any problem in all these effects occuring in the same
    expression. No object is written to and read from in the same expression
    except obj, and it is only read to determine the new value of obj, which
    is allowed.

    The apparent similarity of the original statement

    *pos++ = strchr(ascii,*pc++) - ascii + ' ';

    to statements such as

    a = i++;

    is purely coincidental, because in the latter, the postincrement applies
    to an object which is referenced elsewhere in the same expression,
    whereas in the former, it is not. The pointer is incremented, not the
    pointee.

    In fact it is much more similar to

    *to++ = *from++;

    which is valid even if from == to.
    Philip Potter, Mar 28, 2008
    #7
  8. Willem said:

    > Richard Heathfield wrote:
    > ) Tomás Ó hÉilidhe said:
    > )> with:
    > )>
    > )> while (*pc) *pos++ = strchr(ascii,*pc++) - ascii + ' ';
    > )>
    > )> I thought there might be a sequence point violation if pos and pc
    > )> point to the same thing.. ?
    > )
    > ) It's a valid point. If there is a risk of that, then make sure that the
    > ) increments occur separately:
    >
    > I disagree. While pos and pc may point to the same thing, it's not
    > the thing that is pointed to that gets incremented.


    Ha! Let me think about this for a moment, before I do a second about-turn
    in the space of three articles.

    WLOG we can reduce the expression to *pos++ = *pc++. pos and pc point to
    the same thing but, as you say, are different objects. The object whose
    value is being retrieved for the purpose of determining the value to be
    stored is not itself being modified at all except via the assignment, and
    in that respect is equivalent to x = x, which we all know is legal.

    So yes, you're right, and the squeal of burning rubber is heard once more
    in the land.

    --
    Richard Heathfield <http://www.cpax.org.uk>
    Email: -http://www. +rjh@
    Google users: <http://www.cpax.org.uk/prg/writings/googly.php>
    "Usenet is a strange place" - dmr 29 July 1999
    Richard Heathfield, Mar 28, 2008
    #8
  9. Tomás Ó hÉilidhe

    Chris Dollin Guest

    Willem wrote:

    > Richard Heathfield wrote:
    > ) Tomás Ó hÉilidhe said:
    > )> with:
    > )>
    > )> while (*pc) *pos++ = strchr(ascii,*pc++) - ascii + ' ';
    > )>
    > )> I thought there might be a sequence point violation if pos and pc
    > )> point to the same thing.. ?
    > )
    > ) It's a valid point. If there is a risk of that, then make sure that the
    > ) increments occur separately:
    >
    > I disagree. While pos and pc may point to the same thing, it's not
    > the thing that is pointed to that gets incremented.


    (if pc == pos ...)

    The same location is being written to and read from, and the read isn't
    just to determine the value to be written (in a strict interpretation;
    I'm sure we've had /that/ discussion before); but there is a sequence
    point intervening, unless `strchr` might be a macro, but if it were there
    could be a guarantee that it respected sequence points ...

    My heads hurt.

    This looks like a suitably horrible piece of avoidance:

    while (*pc++) *pos++ = strchr( ascii, pc[-1] ) - ascii + ' ';

    I propose that the expression `E*` be introduced as meaning `(E)[-1]`
    to make this easier to type: postfix-* is the decreference operator,
    and is nicely compaqt. Happy?

    --
    "I know it was late, but Mountjoy never bothers, /Archer's Goon/
    so long as it's the full two thousand words."

    Hewlett-Packard Limited Cain Road, Bracknell, registered no:
    registered office: Berks RG12 1HN 690597 England
    Chris Dollin, Mar 28, 2008
    #9
  10. Tomás Ó hÉilidhe

    Willem Guest

    Richard wrote:
    ) Ha! Let me think about this for a moment, before I do a second about-turn
    ) in the space of three articles.
    )
    ) WLOG we can reduce the expression to *pos++ = *pc++. pos and pc point to
    ) the same thing but, as you say, are different objects. The object whose
    ) value is being retrieved for the purpose of determining the value to be
    ) stored is not itself being modified at all except via the assignment, and
    ) in that respect is equivalent to x = x, which we all know is legal.

    Ah, but wait!
    What if *pos == pc ?
    That is, pos points to the location of pc ?
    Is that even possible ?


    SaSW, Willem
    --
    Disclaimer: I am in no way responsible for any of the statements
    made in the above text. For all I know I might be
    drugged or something..
    No I'm not paranoid. You all think I'm paranoid, don't you !
    #EOT
    Willem, Mar 28, 2008
    #10
  11. Tomás Ó hÉilidhe

    Guest

    On Mar 28, 4:13 pm, Willem <> wrote:
    > Richard wrote:
    >
    > ) Ha! Let me think about this for a moment, before I do a second about-turn
    > ) in the space of three articles.
    > )
    > ) WLOG we can reduce the expression to *pos++ = *pc++. pos and pc point to
    > ) the same thing but, as you say, are different objects. The object whose
    > ) value is being retrieved for the purpose of determining the value to be
    > ) stored is not itself being modified at all except via the assignment, and
    > ) in that respect is equivalent to x = x, which we all know is legal.
    >
    > Ah, but wait!
    > What if *pos == pc ?
    > That is, pos points to the location of pc ?
    > Is that even possible ?

    Not if pos and pc are the same type (unless if both are void *, but
    then what they point to cannot be evaluated).
    However, even assuming pos = &pc, the objects that are modified in *pos
    ++ = .. *pc++ are 'pos' and 'pc' and not '*pos' nor '*pc'.
    So, even with your assumption, it is still valid.
    , Mar 28, 2008
    #11
  12. Tomás Ó hÉilidhe

    Richard Guest

    Willem <> writes:

    > Richard wrote:
    > ) Ha! Let me think about this for a moment, before I do a second about-turn
    > ) in the space of three articles.
    > )
    > ) WLOG we can reduce the expression to *pos++ = *pc++. pos and pc point to
    > ) the same thing but, as you say, are different objects. The object whose
    > ) value is being retrieved for the purpose of determining the value to be
    > ) stored is not itself being modified at all except via the assignment, and
    > ) in that respect is equivalent to x = x, which we all know is legal.
    >
    > Ah, but wait!
    > What if *pos == pc ?
    > That is, pos points to the location of pc ?


    Except that is not it. *pos is the value of pc. pos points to the
    location of a value equal to that of pc.

    > Is that even possible ?
    >
    >
    > SaSW, Willem
    Richard, Mar 28, 2008
    #12
  13. Willem said:

    > Richard wrote:
    > ) Ha! Let me think about this for a moment, before I do a second
    > about-turn ) in the space of three articles.
    > )
    > ) WLOG we can reduce the expression to *pos++ = *pc++. pos and pc point
    > to ) the same thing but, as you say, are different objects. The object
    > whose ) value is being retrieved for the purpose of determining the value
    > to be ) stored is not itself being modified at all except via the
    > assignment, and ) in that respect is equivalent to x = x, which we all
    > know is legal.
    >
    > Ah, but wait!
    > What if *pos == pc ?
    > That is, pos points to the location of pc ?
    > Is that even possible ?


    Um, yes, it's possible, via a cast. But if it does, then changing *pos
    (which is pointing to a pointer but is of type unsigned char *) is writing
    to the pointer itself (pc), in which case the code is totally screwed
    anyway. What price the pointer value after the update?

    But I don't think it reasonable to impose on this function the burden of
    avoiding utter stupidity in the caller. :)

    --
    Richard Heathfield <http://www.cpax.org.uk>
    Email: -http://www. +rjh@
    Google users: <http://www.cpax.org.uk/prg/writings/googly.php>
    "Usenet is a strange place" - dmr 29 July 1999
    Richard Heathfield, Mar 28, 2008
    #13
  14. Richard Heathfield:

    > The object whose
    > value is being retrieved for the purpose of determining the
    > value to be stored is not itself being modified at all except via > the assignment, and in that respect is equivalent to x = x,
    > which we all know is legal.



    With less competant programmers, you'll see that they avoid
    certain programming techniques and contructs because they doubt their
    own competency too much. You'll see them shy away from doing things
    like using pointers to iterate thru array elements in a loop. If you
    listen to comp.lang.c++ for twenty minutes, they'll constantly tell
    you how "dangerous" it is to be using "raw pointers".

    Now I've always been *against* this whole incompetency plea thing,
    but I must admit that *this* is the individual single sole part of the
    C programming language where I allow my own doubts over my own
    competency to reshape the way I write code. That is to say, I'll
    *always* have:

    for ( ; *pc; ++pos, ++pc) *pos = strchr(ascii,*pc) - ascii + 0x20;

    instead of:

    while (*pc) *pos++ = strchr(ascii,*pc++) - ascii + 0x20;

    because I don't want to risk the chance of getting it wrong. This is
    the one place where I actually think I should just play it safe. And
    why am I so frightful? Well I had a program one time that worked
    PERFECTLY on numerous different systems until I decided to enable the
    compiler optimiser. All of a sudden, the program gave different
    output. Of course my first assumption was that the compiler had a
    dodgy optimiser... but anyway I went thru the code -- code which I
    thought had been bullet-proof -- to find the problem. Here was the
    culprit:

    void StrToLower(char *p)
    {
    while ( *p++ = tolower( (char unsigned)*p ) );
    }

    I'll *never* make that mistake again.
    Tomás Ó hÉilidhe, Mar 28, 2008
    #14
  15. Tomás Ó hÉilidhe

    CBFalconer Guest

    Tomás Ó hÉilidhe wrote:
    >

    .... snip ...
    >
    > Now I've always been *against* this whole incompetency plea thing,
    > but I must admit that *this* is the individual single sole part of
    > the C programming language where I allow my own doubts over my own
    > competency to reshape the way I write code. That is to say, I'll
    > *always* have:
    >
    > for ( ; *pc; ++pos, ++pc) *pos = strchr(ascii,*pc) - ascii + 0x20;
    >
    > instead of:
    >
    > while (*pc) *pos++ = strchr(ascii,*pc++) - ascii + 0x20;


    IMO you are making a mistake. The second is simpler, and much
    easier to detect inaccuracies in. The thing that screams at you is
    that both are missing copying the terminal '\0'. Both are missing
    the initialization of pc and pos. Both are missing handling the
    fact that the char is not found in the ascii string.

    --
    [mail]: Chuck F (cbfalconer at maineline dot net)
    [page]: <http://cbfalconer.home.att.net>
    Try the download section.



    --
    Posted via a free Usenet account from http://www.teranews.com
    CBFalconer, Mar 28, 2008
    #15
  16. Tomás Ó hÉilidhe

    Morris Dovey Guest

    Tomás Ó hÉilidhe wrote:

    I think you'd want your character translation logic to look like:

    void make_ascii(char *d,char *s)
    { static char x[] = { 0 }; /* ? */
    do *d++ = x[(unsigned)*s];
    while (*s++);
    }

    which reduces the problem to appropriately initializing x[]. That
    initialization could be done at runtime, but would make more
    sense to do at compile time. One runtime approach might be:

    x[' '] = 0x20;
    x['!'] = 0x21;
    :
    :
    x['~'] = 0x7E;

    but my own preference would be to provide the initialization at
    compile time.

    --
    Morris Dovey
    DeSoto Solar
    DeSoto, Iowa USA
    http://www.iedu.com/DeSoto/
    Morris Dovey, Mar 28, 2008
    #16
  17. CBFalconer:

    > > for ( ; *pc; ++pos, ++pc) *pos = strchr(ascii,*pc) - ascii + 0x20;

    >
    > > instead of:

    >
    > > while (*pc) *pos++ = strchr(ascii,*pc++) - ascii + 0x20;

    >
    > IMO you are making a mistake.  The second is simpler, and much
    > easier to detect inaccuracies in.  The thing that screams at you is
    > that both are missing copying the terminal '\0'.  Both are missing
    > the initialization of pc and pos.  Both are missing handling the
    > fact that the char is not found in the ascii string.



    They're missing neither of those three things. The context of the code
    is as follows:

    static char const ascii[] =
    " !\"#$%&'()*+,-./0123456789:;<=>?@"
    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
    "[\\]^_`"
    "abcdefghijklmnopqrstuvwxyz"
    "{|}~";

    for ( ; *pc; ++pos, ++pc)
    *pos = strchr(ascii,*pc) - ascii + ' ';

    *pos = 0;

    Also, it is assumed that every char is valid ASCII.
    Tomás Ó hÉilidhe, Mar 29, 2008
    #17
  18. Tomás Ó hÉilidhe <> wrote:
    >         case '$': *pos = 0x24u; break;
    > case '@': *pos = 0x40u; break;


    $ and @ are not guaranteed members of implementation
    source or execution character sets.

    --
    Peter
    Peter Nilsson, Mar 29, 2008
    #18
  19. Tomás Ó hÉilidhe

    CBFalconer Guest

    Tomás Ó hÉilidhe wrote:
    > CBFalconer:
    >
    >>> for ( ; *pc; ++pos, ++pc) *pos = strchr(ascii,*pc) - ascii + 0x20;
    >>>
    >>> instead of:
    >>>
    >>> while (*pc) *pos++ = strchr(ascii,*pc++) - ascii + 0x20;

    >>
    >> IMO you are making a mistake. The second is simpler, and much
    >> easier to detect inaccuracies in. The thing that screams at you is
    >> that both are missing copying the terminal '\0'. Both are missing
    >> the initialization of pc and pos. Both are missing handling the
    >> fact that the char is not found in the ascii string.

    >
    > They're missing neither of those three things. The context of the code
    > is as follows:
    >
    > static char const ascii[] =
    > " !\"#$%&'()*+,-./0123456789:;<=>?@"
    > "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
    > "[\\]^_`"
    > "abcdefghijklmnopqrstuvwxyz"
    > "{|}~";
    >
    > for ( ; *pc; ++pos, ++pc)
    > *pos = strchr(ascii,*pc) - ascii + ' ';
    >
    > *pos = 0;
    >
    > Also, it is assumed that every char is valid ASCII.


    Which is a silly assumption. However, you are still failing to
    initialize pc and pos.

    --
    [mail]: Chuck F (cbfalconer at maineline dot net)
    [page]: <http://cbfalconer.home.att.net>
    Try the download section.



    --
    Posted via a free Usenet account from http://www.teranews.com
    CBFalconer, Mar 29, 2008
    #19
  20. Tomás Ó hÉilidhe

    santosh Guest

    CBFalconer wrote:

    > Tomás Ó hÉilidhe wrote:
    >> CBFalconer:
    >>
    >>>> for ( ; *pc; ++pos, ++pc) *pos = strchr(ascii,*pc) - ascii + 0x20;
    >>>>
    >>>> instead of:
    >>>>
    >>>> while (*pc) *pos++ = strchr(ascii,*pc++) - ascii + 0x20;
    >>>
    >>> IMO you are making a mistake. The second is simpler, and much
    >>> easier to detect inaccuracies in. The thing that screams at you is
    >>> that both are missing copying the terminal '\0'. Both are missing
    >>> the initialization of pc and pos. Both are missing handling the
    >>> fact that the char is not found in the ascii string.

    >>
    >> They're missing neither of those three things. The context of the
    >> code is as follows:
    >>
    >> static char const ascii[] =
    >> " !\"#$%&'()*+,-./0123456789:;<=>?@"
    >> "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
    >> "[\\]^_`"
    >> "abcdefghijklmnopqrstuvwxyz"
    >> "{|}~";
    >>
    >> for ( ; *pc; ++pos, ++pc)
    >> *pos = strchr(ascii,*pc) - ascii + ' ';
    >>
    >> *pos = 0;
    >>
    >> Also, it is assumed that every char is valid ASCII.

    >
    > Which is a silly assumption. However, you are still failing to
    > initialize pc and pos.


    They are initialised on entry to the function. Please read the previous
    articles before coming to conclusions.
    santosh, Mar 29, 2008
    #20
    1. Advertising

Want to reply to this thread or ask your own question?

It takes just 2 minutes to sign up (and it's free!). Just click the sign up button to choose a username and then you can ask your own questions on the forum.
Similar Threads
  1. jt
    Replies:
    2
    Views:
    874
    upashu2
    Jul 14, 2005
  2. John Gregory
    Replies:
    0
    Views:
    294
    John Gregory
    Jul 5, 2009
  3. Alextophi
    Replies:
    8
    Views:
    501
    Alan J. Flavell
    Dec 30, 2005
  4. Bart Vandewoestyne
    Replies:
    8
    Views:
    725
    Bart Vandewoestyne
    Sep 25, 2012
  5. bruce
    Replies:
    38
    Views:
    265
    Mark Lawrence
    Nov 1, 2013
Loading...

Share This Page