Hash table implementation.

Discussion in 'C Programming' started by lolzy@live.nl, Aug 11, 2011.

  1. Guest

    Hello comp.lang.c,

    This is a hash table implementation I wrote, please comment. Thanks in
    advance.


    # START CODE

    /*
    * Hash table demo.
    * (C) Jori Koolstra - 19:40 11-8-2011
    */


    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>


    #define HASH_TABLE_SIZE 100


    #define HASH_TABLE_DELETE_ELEMENT_CHECK_FOR_ERRORS(t, n) if
    (hash_table_delete_element(t, n) == 0) hash_table_fatal_error(0, n)

    #define HASH_TABLE_GET_STRING_FROM_TABLE_CHECK_FOR_ERRORS(t, n, z) if
    ((z = hash_table_get_string_from_table(t, n)) == NULL)
    hash_table_fatal_error(1, n)


    struct table_data
    {
    char *uid;
    char *d;
    };


    typedef struct table_data table_data;



    struct hash_table
    {
    table_data data;
    struct hash_table *x;
    };


    typedef struct hash_table hash_table;



    hash_table ** hash_table_create_table(int);
    hash_table * hash_table_get_string_from_table(hash_table **, char *);
    int hash_table_hash(char *);
    void hash_table_add_element(hash_table **, char *, char *);
    int hash_table_delete_element(hash_table **, char *);
    void hash_table_fatal_error(int, char *);



    int main(void)
    {
    hash_table **table;
    hash_table *y;

    table = hash_table_create_table(HASH_TABLE_SIZE);

    hash_table_add_element(table, "John", "Hello John!");
    hash_table_add_element(table, "Kim", "Hello Kim!");
    hash_table_add_element(table, "Jori", "Hello Jori!");
    hash_table_add_element(table, "Jack", "Hello Jack!");

    HASH_TABLE_DELETE_ELEMENT_CHECK_FOR_ERRORS(table, "Kim");

    HASH_TABLE_GET_STRING_FROM_TABLE_CHECK_FOR_ERRORS(table, "Jack", y);

    printf("%s\n", y->data.d);




    return 0;
    }



    /*
    * Print error and quit.
    */
    void hash_table_fatal_error(int t, char *el)
    {
    switch (t)
    {
    case 0:
    printf("ERROR: Could not delete element '%s' from hash table.\n",
    el);
    break;
    case 1:
    printf("ERROR: Could not find key '%s' in hash table.\n", el);
    break;
    }


    exit(0);
    }



    /*
    * Create a hash table with size 'size' and return a hash table array.
    */
    hash_table ** hash_table_create_table(int size)
    {
    register int i = 0;
    hash_table **x;


    x = (hash_table **) calloc(size, sizeof(hash_table *));

    for (; i <= size; ++i)
    {
    x = (hash_table *) calloc(1, sizeof(hash_table));
    }

    return x;
    }



    /*
    * Returns a specified hash table element from table 't' with key 's',
    * or NULL on failure.
    */
    hash_table * hash_table_get_string_from_table(hash_table **t, char *s)
    {
    int id = hash_table_hash(s);
    hash_table *q;

    if (t[id]->data.uid != NULL && strcmp(t[id]->data.uid, s) != 0)
    {
    if (t[id]->x != NULL)
    {
    /* Move trough linked list */
    for (q = t[id]->x; ; q = q->x)
    {
    if (strcmp(q->data.uid, s) == 0)
    {
    /* Found */
    return q;
    }


    if (q->x == NULL)
    {
    break;
    }
    }
    }

    return NULL;
    }

    return (t[id]->data.uid == NULL ? NULL : t[id]);
    }



    /*
    * Add 'data' to table 't' with key 'uid'.
    */
    void hash_table_add_element(hash_table **t, char *uid, char *data)
    {
    int hash = hash_table_hash(uid);
    hash_table *q = (hash_table *) calloc(1, sizeof(hash_table));
    hash_table *c;

    q->data.uid = strdup(uid);
    q->data.d = strdup(data);
    q->x = NULL;


    c = t[hash];


    if (c->data.uid != NULL)
    {
    while (c->x != NULL)
    {
    c = c->x;
    }

    c->x = q;
    }
    else
    {
    t[hash]->data.uid = strdup(uid);
    t[hash]->data.d = strdup(data);
    }
    }



    /*
    * Delete element identified by 'uid' from hash table 't'.
    * Returns: 1 on success, 0 on failure.
    */
    int hash_table_delete_element(hash_table **t, char *uid)
    {
    int id = hash_table_hash(uid);
    hash_table *q;


    if (t[id]->data.uid == NULL)
    {
    return 0;
    }


    if (t[id]->data.uid != NULL && strcmp(t[id]->data.uid, uid) != 0)
    {
    /* Move trough linked list */
    for (q = t[id]; ; q = q->x)
    {
    if (strcmp(q->x->data.uid, uid) == 0)
    {
    if (q->x->x != NULL)
    {
    free(q->x);
    q->x = q->x->x;
    }
    else
    {
    free(q->x);
    q->x = NULL;
    }

    return 1;
    }


    if (q->x == NULL)
    {
    break;
    }
    }

    return 0;
    }


    if (t[id]->x == NULL)
    {
    t[id]->data.uid = NULL;
    }
    else
    {
    t[id]->data.uid = t[id]->x->data.uid;
    t[id]->data.d = t[id]->x->data.d;
    t[id]->x = t[id]->x->x;
    }

    return 1;
    }




    /***** NOT UNDER MY COPYRIGHT *****/
    /*
    * UNIX ELF hash.
    * Published hash algorithm used in the UNIX ELF format for object
    files.
    */
    int hash_table_hash(char *data)
    {
    int h = 0, g;

    while (*data)
    {
    h = (h << 4) + *data++;

    if (g = h & 0xF0000000)
    {
    h ^= g >> 24;
    }

    h &= ~g;
    }

    return h % HASH_TABLE_SIZE;
    }

    # END CODE
     
    , Aug 11, 2011
    #1
    1. Advertising

  2. Stefan Ram Guest

    writes:
    >x = (hash_table **) calloc(size, sizeof(hash_table *));
    > for (; i <= size; ++i)
    > {
    > x = (hash_table *) calloc(1, sizeof(hash_table));


    x might be 0 here.
     
    Stefan Ram, Aug 11, 2011
    #2
    1. Advertising

  3. -berlin.de (Stefan Ram) writes:
    > writes:
    >>x = (hash_table **) calloc(size, sizeof(hash_table *));
    >> for (; i <= size; ++i)
    >> {
    >> x = (hash_table *) calloc(1, sizeof(hash_table));

    >
    > x might be 0 here.


    And if x isn't null, the elements of x might not be null pointers;
    there's no guarantee that a null pointer is represented as
    all-bits-zero. If you're satisfied with portability only to
    systems where null pointers *are* all-bits-zero, you can do that,
    but I'd suggest at least documenting it. Or you can use malloc()
    and then set each element to NULL in a loop.

    --
    Keith Thompson (The_Other_Keith) <http://www.ghoti.net/~kst>
    Nokia
    "We must do something. This is something. Therefore, we must do this."
    -- Antony Jay and Jonathan Lynn, "Yes Minister"
     
    Keith Thompson, Aug 11, 2011
    #3
  4. Keith Thompson <> wrote:

    > -berlin.de (Stefan Ram) writes:
    >> writes:
    >>>x = (hash_table **) calloc(size, sizeof(hash_table *));
    >>> for (; i <= size; ++i)
    >>> {
    >>> x = (hash_table *) calloc(1, sizeof(hash_table));

    >>
    >> x might be 0 here.

    >
    > And if x isn't null, the elements of x might not be null pointers;


    You are right, at the place marked "here" for a non-null x these
    pointers are either null pointers or point to an object
    sufficiently large to hold an instance of hash_table.

    -- Ralf
     
    Ralf Damaschke, Aug 11, 2011
    #4
  5. Ike Naar Guest

    On 2011-08-11, <> wrote:
    > hash_table ** hash_table_create_table(int size)
    > {
    > register int i = 0;


    It's probably a good idea to drop the 'register' qualification
    and leave this kind of optimization to the compiler.
    I'd prefer to make 'size' and 'i' of type size_t.

    > hash_table **x;
    >
    >
    > x = (hash_table **) calloc(size, sizeof(hash_table *));


    The cast is unnecessary; the recommended idiom is

    x = calloc(size, sizeof *x);

    calloc can fail, and return NULL; you should check for that.
    It's not necessary to set the elements of the array to all-bits-zero,
    because you immediately overwrite each element in the for loop below.
    So you might as well allocate the array using

    x = malloc(size * sizeof *x);

    > for (; i <= size; ++i)
    > {
    > x = (hash_table *) calloc(1, sizeof(hash_table));
    > }


    This looks like an off-by-error; x has only 'size' elements,
    numbered 0 to size-1. The last iteration of the for loop assigns
    to the nonexisting element x[size].
    >
    > return x;
    > }
     
    Ike Naar, Aug 11, 2011
    #5
  6. Guest

    Thanks for all your comments. I still have some questions :) :

    * Why do you prefer malloc() over calloc() ?
    * Why not cast for C++ compatibility?
    * Why shouldn't I use the register keyword for loop variables?
     
    , Aug 12, 2011
    #6
  7. Guest

    I took a look at the code, and I optimized it to:
    (Still looking for answers for my question 1 post up)
    Thanks in advance.

    /*
    * Hash table demo.
    * (C) Jori Koolstra - 19:40 11-8-2011
    */


    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>


    #define HASH_TABLE_SIZE 100


    #define HASH_TABLE_DELETE_ELEMENT_CHECK_FOR_ERRORS(t, n) if
    (hash_table_delete_element(t, n) == 0) hash_table_fatal_error(0, n)

    #define HASH_TABLE_GET_STRING_FROM_TABLE_CHECK_FOR_ERRORS(t, n, z) if
    ((z = hash_table_get_string_from_table(t, n)) == NULL)
    hash_table_fatal_error(1, n)


    struct table_data
    {
    char *uid;
    char *d;
    };


    typedef struct table_data table_data;



    struct hash_table
    {
    table_data data;
    struct hash_table *x;
    };


    typedef struct hash_table hash_table;



    hash_table ** hash_table_create_table(int);
    hash_table * hash_table_get_string_from_table(hash_table **, char *);
    int hash_table_hash(char *);
    void hash_table_add_element(hash_table **, char *, char *);
    int hash_table_delete_element(hash_table **, char *);
    void hash_table_fatal_error(int, char *);
    void * hash_table_safe_calloc(size_t, size_t);


    int main(void)
    {
    hash_table **table;
    hash_table *y;

    table = hash_table_create_table(HASH_TABLE_SIZE);

    hash_table_add_element(table, "John", "Hello John!");
    hash_table_add_element(table, "Kim", "Hello Kim!");
    hash_table_add_element(table, "Jori", "Hello Jori!");
    hash_table_add_element(table, "Jack", "Hello Jack!");

    HASH_TABLE_DELETE_ELEMENT_CHECK_FOR_ERRORS(table, "Kim");

    HASH_TABLE_GET_STRING_FROM_TABLE_CHECK_FOR_ERRORS(table, "Jack", y);

    printf("%s\n", y->data.d);




    return 0;
    }



    /*
    * Print error and quit.
    */
    void hash_table_fatal_error(int t, char *el)
    {
    switch (t)
    {
    case 0:
    printf("ERROR: Could not delete element '%s' from hash table.\n",
    el);
    break;
    case 1:
    printf("ERROR: Could not find key '%s' in hash table.\n", el);
    break;
    case 2:
    printf("ERROR: Could not allocate memory.\n");
    break;
    }


    exit(0);
    }



    /*
    * Create a hash table with size 'size' and return a hash table array.
    */
    hash_table ** hash_table_create_table(int size)
    {
    register int i = 0;
    hash_table **x;


    x = (hash_table **) hash_table_safe_calloc(size, sizeof(hash_table
    *));

    for (; i < size; ++i)
    {
    x = (hash_table *) hash_table_safe_calloc(1, sizeof(hash_table));
    }

    return x;
    }



    /*
    * Returns a specified hash table element from table 't' with key 's',
    * or NULL on failure.
    */
    hash_table * hash_table_get_string_from_table(hash_table **t, char *s)
    {
    hash_table *q;

    for (q = t[hash_table_hash(s)]; q; q = q->x)
    {
    if (q->data.uid && 0 == strcmp(q->data.uid, s))
    {
    return q;
    }
    }

    return NULL;
    }



    /*
    * Add 'data' to table 't' with key 'uid'.
    */
    void hash_table_add_element(hash_table **t, char *uid, char *data)
    {
    int h = hash_table_hash(uid);
    hash_table *q, *c = t[h];


    if (c->data.uid != NULL)
    {
    q = (hash_table *) hash_table_safe_calloc(1, sizeof(hash_table));
    q->data.uid = strdup(uid);
    q->data.d = strdup(data);

    for (; c->x != NULL; c = c->x)

    c->x = q;
    }
    else
    {
    t[h]->data.uid = strdup(uid);
    t[h]->data.d = strdup(data);
    }
    }



    /*
    * Delete element identified by 'uid' from hash table 't'.
    * Returns: 1 on success, 0 on failure.
    */
    int hash_table_delete_element(hash_table **t, char *uid)
    {
    int h = hash_table_hash(uid);
    hash_table *q;


    if (t[h]->data.uid != NULL && strcmp(t[h]->data.uid, uid) != 0)
    {
    /* Move trough linked list */
    for (q = t[h]; q; q = q->x)
    {
    if (strcmp(q->x->data.uid, uid) == 0)
    {
    free(q->x);
    q->x = q->x->x;

    return 1;
    }
    }

    return 0;
    }


    if (t[h]->x == NULL)
    {
    if (t[h]->data.uid == NULL)
    {
    return 0;
    }
    else
    {
    t[h]->data.uid = NULL;
    }
    }
    else
    {
    t[h]->data.uid = t[h]->x->data.uid;
    t[h]->data.d = t[h]->x->data.d;
    t[h]->x = t[h]->x->x;
    }

    return 1;
    }



    /*
    * Allocate space with error checking.
    */
    void * hash_table_safe_calloc(size_t num, size_t size)
    {
    void *p;

    if ((p = calloc(num, size)) == NULL)
    {
    hash_table_fatal_error(2, "");
    }

    return p;
    }




    /***** NOT UNDER MY COPYRIGHT *****/
    /*
    * UNIX ELF hash.
    * Published hash algorithm used in the UNIX ELF format for object
    files.
    */
    int hash_table_hash(char *data)
    {
    int h = 0, g;

    while (*data)
    {
    h = (h << 4) + *data++;

    if (g = h & 0xF0000000)
    {
    h ^= g >> 24;
    }

    h &= ~g;
    }

    return h % HASH_TABLE_SIZE;
    }
     
    , Aug 12, 2011
    #7
  8. Nobody Guest

    On Thu, 11 Aug 2011 23:55:45 -0700, lolzy wrote:

    > Thanks for all your comments. I still have some questions :) :
    >
    > * Why do you prefer malloc() over calloc() ?


    calloc() zeros the allocated memory, which is inefficient if you don't
    actually need to do so.

    > * Why not cast for C++ compatibility?


    There are arguments for and against casting. The main argument against is
    that casting can prevent a diagnostic being issued if the prototype isn't
    in scope.

    > * Why shouldn't I use the register keyword for loop variables?


    At best, it's a waste of both keystrokes and bytes; the compiler will
    probably ignore it when deciding where to store the variable (i.e. the
    only effect of a "register" qualifier is likely to be that you get an
    error if you attempt to take the variable's address). At worst, the
    compiler might actually pay attention to it, and you get less efficient
    code than the compiler would have generated without it.

    The "register" qualifier originated in an era where compilers performed a
    fairly direct translation from source code to object code, without any
    significant optimisation. It is somewhere between useless and worse than
    useless with a modern compiler.
     
    Nobody, Aug 12, 2011
    #8
  9. One thing that you may want to do is to use the size as more of a suggestion than a firm number. Many hash functions work better when the main size isa prime number. You can pick a prime number that is close to the size requested or pick the first prime that is not less than the size requested. Primacy is not really a requirement when the size of the table gets really large. It would be exceedingly reasonable to keep a list of primes less than 256 and require that the main size have no factors less than 256. This wouldallow you easily to pick a prime number less 64K and give you a "size thatisn't completely stupid" for larger tables. For sizes less than 256, you just scan the prime table upward to find the first prime.

    If you don't always use a realy cool hash function, then I agree that you might like to use the "object inspired" approach of storing a function pointer to a hash function.
     
    Michael Angelo Ravera, Aug 12, 2011
    #9
  10. Ike Naar Guest

    On 2011-08-12, <> wrote:
    > * Why do you prefer malloc() over calloc() ?


    malloc allocates memory.
    calloc allocates memory and initializes it to all-bytes-zero.
    In your case, initializing the memory to all-bytes-zero is
    wasted effort since you initialize the memory yourself.

    > * Why not cast for C++ compatibility?


    You'll have to make up your mind if you want to write the program
    in C or in C++. They are different languages, and if you try to
    write code that is both C and C++ you may end up with something
    that is neither decent C nor decent C++.
    For instance, in a C++ program you would probably prefer std::vector
    over a malloc-ed array.

    > * Why shouldn't I use the register keyword for loop variables?


    Suppose the compiler honours the keyword, and reserves a register
    for the loop variable. That register now cannot be used for more
    useful purposes (e.g. as temporary storage for computing subexpressions)
    and you may end up with less efficient code.
     
    Ike Naar, Aug 12, 2011
    #10
  11. Eric Sosman Guest

    On 8/12/2011 2:55 AM, wrote:
    > Thanks for all your comments. I still have some questions :) :
    >
    > * Why do you prefer malloc() over calloc() ?


    This was explained elsethread.

    > * Why not cast for C++ compatibility?


    Why not use `#define STOP ;' and write STOP instead of ; at
    the end of each statement, so you can change it to `#define STOP .'
    for easier conversion to COBOL?

    > * Why shouldn't I use the register keyword for loop variables?


    Because the compilers know the trade-offs for their particular
    machines better than you know them for all machines.

    --
    Eric Sosman
    d
     
    Eric Sosman, Aug 12, 2011
    #11
  12. tom st denis Guest

    On Aug 12, 4:47 am, Nobody <> wrote:
    > On Thu, 11 Aug 2011 23:55:45 -0700, lolzy wrote:
    > > Thanks for all your comments. I still have some questions :) :

    >
    > > * Why do you prefer malloc() over calloc() ?

    >
    > calloc() zeros the allocated memory, which is inefficient if you don't
    > actually need to do so.


    Generally, if you're allocating memory for a struct that has pointers
    calloc() is your buddy since it'll also NULL the pointers [ya ya
    rabble rabble, it's portable, let it go]. If you have char arrays
    they're now NUL terminated, etc and so on...

    If you're just allocating a buffer and then going to immediately use
    it then malloc() is handy.

    Tom
     
    tom st denis, Aug 12, 2011
    #12
  13. Joe Pfeiffer Guest

    writes:

    > Thanks for all your comments. I still have some questions :) :
    >
    > * Why do you prefer malloc() over calloc() ?


    Slightly faster if you're going to initialize the space to anything but
    0.

    > * Why not cast for C++ compatibility?


    Because it isn't C++

    > * Why shouldn't I use the register keyword for loop variables?


    There's no real reason not to, but every compiler I'm familiar with
    ignores it so there's no real point.
     
    Joe Pfeiffer, Aug 12, 2011
    #13
  14. James Kuyper Guest

    On 08/12/2011 09:15 AM, tom st denis wrote:
    > On Aug 12, 4:47 am, Nobody <> wrote:
    >> On Thu, 11 Aug 2011 23:55:45 -0700, lolzy wrote:
    >>> Thanks for all your comments. I still have some questions :) :

    >>
    >>> * Why do you prefer malloc() over calloc() ?

    >>
    >> calloc() zeros the allocated memory, which is inefficient if you don't
    >> actually need to do so.

    >
    > Generally, if you're allocating memory for a struct that has pointers
    > calloc() is your buddy since it'll also NULL the pointers [ya ya
    > rabble rabble, it's portable, let it go].


    It will set all there bits zero, which is not necessarily a
    representation of a null pointer. So, no, it is NOT portable.

    Similarly, a floating point object with all bits zeros does not
    necessarily represent 0.0, though that does happen to be the case for
    most implementations.

    However, you're fine as long as you stick with arithmetic types.
     
    James Kuyper, Aug 12, 2011
    #14
  15. tom st denis <> writes:
    > On Aug 12, 4:47 am, Nobody <> wrote:
    >> On Thu, 11 Aug 2011 23:55:45 -0700, lolzy wrote:
    >> > Thanks for all your comments. I still have some questions :) :

    >>
    >> > * Why do you prefer malloc() over calloc() ?

    >>
    >> calloc() zeros the allocated memory, which is inefficient if you don't
    >> actually need to do so.

    >
    > Generally, if you're allocating memory for a struct that has pointers
    > calloc() is your buddy since it'll also NULL the pointers [ya ya
    > rabble rabble, it's portable, let it go]. If you have char arrays
    > they're now NUL terminated, etc and so on...


    No, I don't think I will let it go.

    The language does not guarantee that either null pointers or
    floating-point zero are represented as all-bits-zero.

    There wasn't even such a guarantee for integer types until *after*
    C99. (As of the C99 standard, an integer representation with padding
    bits could in principle require some of those bits to be 1, making
    all-bits-zero a trap representation; one of the technical corrigenda
    requires all-bits-zero to be a valid representation for 0.)

    I don't know of any implementation where all-bits-zero *isn't*
    the representation for null pointers, zero-valued integers,
    and zero-valued floating-point numbers. So if you use calloc()
    and assume that it sets your pointers to NULL, you can almost
    certainly get away with it. But your code won't be portable to
    any implementations that violate those assumptions.

    If you're ok with that, go ahead -- but at least add a comment
    exlaining the choice you've made.

    *Now* I'll let it go. :cool:}

    > If you're just allocating a buffer and then going to immediately use
    > it then malloc() is handy.


    Why allocate a buffer unless you're going to use it? There are
    cases where it's convenient to initialize some large data structure
    to all zeros, with the possibility that you'll actually use the zero
    values before writing some meaningful value. But another approach
    is for your program's logic to keep track of which elements you've
    set, and refer only to those elements' values. For example, if
    a char array is going to contain a string, you don't need to zero
    the whole thing; zeroing the first byte makes it an empty string.

    --
    Keith Thompson (The_Other_Keith) <http://www.ghoti.net/~kst>
    Nokia
    "We must do something. This is something. Therefore, we must do this."
    -- Antony Jay and Jonathan Lynn, "Yes Minister"
     
    Keith Thompson, Aug 12, 2011
    #15
  16. James Kuyper <> writes:
    > On 08/12/2011 09:15 AM, tom st denis wrote:
    >> On Aug 12, 4:47 am, Nobody <> wrote:
    >>> On Thu, 11 Aug 2011 23:55:45 -0700, lolzy wrote:
    >>>> Thanks for all your comments. I still have some questions :) :
    >>>
    >>>> * Why do you prefer malloc() over calloc() ?
    >>>
    >>> calloc() zeros the allocated memory, which is inefficient if you don't
    >>> actually need to do so.

    >>
    >> Generally, if you're allocating memory for a struct that has pointers
    >> calloc() is your buddy since it'll also NULL the pointers [ya ya
    >> rabble rabble, it's portable, let it go].

    >
    > It will set all there bits zero, which is not necessarily a
    > representation of a null pointer. So, no, it is NOT portable.
    >
    > Similarly, a floating point object with all bits zeros does not
    > necessarily represent 0.0, though that does happen to be the case for
    > most implementations.
    >
    > However, you're fine as long as you stick with arithmetic types.


    You're fine as long as you stick with *integer* types. (Floating-point
    types are arithmetic.)

    --
    Keith Thompson (The_Other_Keith) <http://www.ghoti.net/~kst>
    Nokia
    "We must do something. This is something. Therefore, we must do this."
    -- Antony Jay and Jonathan Lynn, "Yes Minister"
     
    Keith Thompson, Aug 12, 2011
    #16
  17. James Kuyper Guest

    On 08/12/2011 11:50 AM, Keith Thompson wrote:
    > James Kuyper <> writes:

    ....
    >> Similarly, a floating point object with all bits zeros does not
    >> necessarily represent 0.0, though that does happen to be the case for
    >> most implementations.
    >>
    >> However, you're fine as long as you stick with arithmetic types.

    >
    > You're fine as long as you stick with *integer* types. (Floating-point
    > types are arithmetic.)


    Yep, I thought about that, just 2 seconds after I sent the message.
     
    James Kuyper, Aug 12, 2011
    #17
  18. tom st denis Guest

    On Aug 12, 11:38 am, Keith Thompson <> wrote:
    > tom st denis <> writes:
    >
    > > On Aug 12, 4:47 am, Nobody <> wrote:
    > >> On Thu, 11 Aug 2011 23:55:45 -0700, lolzy wrote:
    > >> > Thanks for all your comments. I still have some questions :) :

    >
    > >> > * Why do you prefer malloc() over calloc() ?

    >
    > >> calloc() zeros the allocated memory, which is inefficient if you don't
    > >> actually need to do so.

    >
    > > Generally, if you're allocating memory for a struct that has pointers
    > > calloc() is your buddy since it'll also NULL the pointers [ya ya
    > > rabble rabble, it's portable, let it go].  If you have char arrays
    > > they're now NUL terminated, etc and so on...

    >
    > No, I don't think I will let it go.
    >
    > The language does not guarantee that either null pointers or
    > floating-point zero are represented as all-bits-zero.
    >
    > There wasn't even such a guarantee for integer types until *after*
    > C99.  (As of the C99 standard, an integer representation with padding
    > bits could in principle require some of those bits to be 1, making
    > all-bits-zero a trap representation; one of the technical corrigenda
    > requires all-bits-zero to be a valid representation for 0.)
    >
    > I don't know of any implementation where all-bits-zero *isn't*
    > the representation for null pointers, zero-valued integers,
    > and zero-valued floating-point numbers.  So if you use calloc()
    > and assume that it sets your pointers to NULL, you can almost
    > certainly get away with it.  But your code won't be portable to
    > any implementations that violate those assumptions.
    >
    > If you're ok with that, go ahead -- but at least add a comment
    > exlaining the choice you've made.
    >
    > *Now* I'll let it go.  :cool:}


    While I appreciate the indepth analysis I did put that aside in there
    BECAUSE I knew it wasn't 100% portable.

    But basically for most developers we have two alternatives

    1. calloc the struct and zero all pointers/etc thereby assuring that
    you have more coverage and NULL pointers that weren't initialized will
    be detected tout-suite

    or

    2. malloc the struct, try and cover all pointers/etc and run the risk
    of missing some that may just randomly behave [or worse, appear to]
    and thereby introduce security holes and uptime hazards...

    > > If you're just allocating a buffer and then going to immediately use
    > > it then malloc() is handy.

    >
    > Why allocate a buffer unless you're going to use it?  There are
    > cases where it's convenient to initialize some large data structure
    > to all zeros, with the possibility that you'll actually use the zero
    > values before writing some meaningful value.  But another approach
    > is for your program's logic to keep track of which elements you've
    > set, and refer only to those elements' values.  For example, if
    > a char array is going to contain a string, you don't need to zero
    > the whole thing; zeroing the first byte makes it an empty string.


    What I meant is something like

    char *p = malloc(10); // I need some bytes

    p[0] = somebuf[5];
    // etc and so on

    I always use calloc() [or equivs] when allocating structures ... I
    just like having known state buffers...

    Tom
     
    tom st denis, Aug 12, 2011
    #18
  19. James Kuyper Guest

    On 08/12/2011 01:17 PM, tom st denis wrote:
    > On Aug 12, 11:38 am, Keith Thompson <> wrote:
    >> tom st denis <> writes:

    ....
    >>> Generally, if you're allocating memory for a struct that has pointers
    >>> calloc() is your buddy since it'll also NULL the pointers [ya ya
    >>> rabble rabble, it's portable, let it go]. If you have char arrays
    >>> they're now NUL terminated, etc and so on...

    ....
    > While I appreciate the indepth analysis I did put that aside in there
    > BECAUSE I knew it wasn't 100% portable.


    Saying "it'll also NULL the pointers" and "it's portable" is not a good
    way of communicating the fact that you knew that it will sometimes fail
    to create null pointers, and that it's not always portable. You should
    be careful about saying things like that; it could give people the
    impression that you are unaware of the fact that there are real
    implementations where such code won't work.

    > But basically for most developers we have two alternatives
    >
    > 1. calloc the struct and zero all pointers/etc thereby assuring that
    > you have more coverage and NULL pointers that weren't initialized will
    > be detected tout-suite
    >
    > or
    >
    > 2. malloc the struct, try and cover all pointers/etc and run the risk
    > of missing some that may just randomly behave [or worse, appear to]
    > and thereby introduce security holes and uptime hazards...


    You've left out option 3, which is the one I use most often:

    3. malloc() the struct, and then fill in it's members only when I have
    actual values to fill them in with that I intend to use. Sometimes those
    will be the same values that would result from either
    zero-initialization, but often they will not be.

    Another technique I often use is as follows:

    static const struct my_struct empty;
    struct my_struct *p = malloc(*p);
    if(p)
    {
    *p = empty;
    }
    else
    {
    // error handling.
    }
     
    James Kuyper, Aug 12, 2011
    #19
  20. tom st denis Guest

    On Aug 12, 1:54 pm, James Kuyper <> wrote:
    > On 08/12/2011 01:17 PM, tom st denis wrote:
    >
    > > On Aug 12, 11:38 am, Keith Thompson <> wrote:
    > >> tom st denis <> writes:

    > ...
    > >>> Generally, if you're allocating memory for a struct that has pointers
    > >>> calloc() is your buddy since it'll also NULL the pointers [ya ya
    > >>> rabble rabble, it's portable, let it go].  If you have char arrays
    > >>> they're now NUL terminated, etc and so on...

    > ...
    > > While I appreciate the indepth analysis I did put that aside in there
    > > BECAUSE I knew it wasn't 100% portable.

    >
    > Saying "it'll also NULL the pointers" and "it's portable" is not a good
    > way of communicating the fact that you knew that it will sometimes fail
    > to create null pointers, and that it's not always portable. You should
    > be careful about saying things like that; it could give people the
    > impression that you are unaware of the fact that there are real
    > implementations where such code won't work.


    It's portable that in if you were born after the fall of the wall, and
    develop software used on machines after the fall of indoor smoking at
    shopping centres ... chances are highly good that you're on a machine
    with[out] a MMU where there are no tag bits, no funkyness, and
    pointers are just 16, 32, and/or 64 bit words where NULL is just all
    zeroes.

    For the 7 people out there who work on their PDP-11 slowly calculating
    the value of 'e' or whatever you do with those machines ... they can
    take heed of your warning.

    ISO C would do well by just dropping such nonsense...


    > > But basically for most developers we have two alternatives

    >
    > > 1.  calloc the struct and zero all pointers/etc thereby assuring that
    > > you have more coverage and NULL pointers that weren't initialized will
    > > be detected tout-suite

    >
    > > or

    >
    > > 2.  malloc the struct, try and cover all pointers/etc and run the risk
    > > of missing some that may just randomly behave [or worse, appear to]
    > > and thereby introduce security holes and uptime hazards...

    >
    > You've left out option 3, which is the one I use most often:
    >
    > 3. malloc() the struct, and then fill in it's members only when I have
    > actual values to fill them in with that I intend to use. Sometimes those
    > will be the same values that would result from either
    > zero-initialization, but often they will not be.


    Sometimes you initialize [or alloc] a structure in one routine and
    don't fill in all of the values [or can't just yet] but it's later
    used by code that CAN make use of the additional members. So not
    having a default value in the pointer means that it creates UB.

    > Another technique I often use is as follows:
    >
    >     static const struct my_struct empty;
    >     struct my_struct *p = malloc(*p);
    >     if(p)
    >     {
    >          *p = empty;
    >     }
    >     else
    >     {
    >          // error handling.
    >     }


    typo aside (not pointing out because I'm not a jackass, I'll assume
    you know C hehehehe), that is nice and portable, and sure if people
    used that all the power to them. It does waste a bit of BSS segment
    space though.

    All I'm advocating is that if people cut corners they cut them so
    they're less likely to cause UB than more. Your last solution is the
    most ideal of them all [that you presented] but I'm still ok with just
    simply using calloc() over malloc() if all else fails.

    Tom
     
    tom st denis, Aug 12, 2011
    #20
    1. Advertising

Want to reply to this thread or ask your own question?

It takes just 2 minutes to sign up (and it's free!). Just click the sign up button to choose a username and then you can ask your own questions on the forum.
Similar Threads
  1. Diane
    Replies:
    7
    Views:
    4,859
    Roland Pibinger
    Apr 4, 2006
  2. Ryan Kaskel
    Replies:
    1
    Views:
    579
    Ryan Kaskel
    Apr 26, 2006
  3. Jim Cobban
    Replies:
    8
    Views:
    563
    Jerry Coffin
    Apr 17, 2008
  4. rp
    Replies:
    1
    Views:
    562
    red floyd
    Nov 10, 2011
  5. Srijayanth Sridhar
    Replies:
    19
    Views:
    655
    David A. Black
    Jul 2, 2008
Loading...

Share This Page