Speaking of selection algorithms, I had never heard of heapselect, so I wrote one.

Discussion in 'C Programming' started by user923005, Jun 6, 2007.

  1. user923005

    user923005 Guest

    /*
    Tragically, I have bollixed it up badly. It's such a simple
    algorithm, but somehow I've gone off, because it does not always
    work. Any ideas where I went off?
    */

    #include <stdlib.h>
    #include <string.h>
    #include <errno.h>

    /*
    We will swap sizeof(long) bytes at a time if possible (if the data is
    long aligned),
    else a char at a time.
    */
    static void swapfunc(char *a, char *b, const size_t e_size, const
    int swaptype)
    {
    if (swaptype <= 1) {
    size_t i = (e_size) / sizeof(long);
    long *pi = (long *) (void *) a;
    long *pj = (long *) (void *) b;
    do {
    const long t = *pi;
    *pi++ = *pj;
    *pj++ = t;
    } while (--i > 0);
    } else {
    size_t i = (e_size);
    char *pi = (char *) (void *) a;
    char *pj = (char *) (void *) b;
    do {
    const char t = *pi;
    *pi++ = *pj;
    *pj++ = t;
    } while (--i > 0);
    }
    }
    /*
    Citation: http://www.ics.uci.edu/~eppstein/161/960125.html

    This beautiful algorithm:

    heapselect(L,k)
    {
    heap H = heapify(L)
    for (i = 1; i < k; i++) remove min(H)
    return min(H)
    }

    has slowly transmongrified itself into this awful slop:
    */
    int heapselect(void *voidbase, const size_t rank, const
    size_t nmemb, const size_t e_size, int (*cmp) (const void *, const
    void *))
    {
    char *base = voidbase; /* Base of the array */
    char *tmp; /* will hold temporary element after
    allocation */
    size_t i, /* array position indicator */
    i_stride, /* array position indicator, adjusted
    for element stride */
    j, /* array position indicator */
    j_stride, /* array position indicator, adjusted
    for element stride */
    left, /* array position indicator */
    mid, /* array position indicator */
    mid_stride, /* array position indicator, adjusted
    for element stride */
    right; /* array position indicator */
    /* swaptype will tell us about the alignment of the data type, for
    opimization of exchange */
    const unsigned swaptype = ((char *) (base) - (char *) 0) %
    sizeof(long) || (e_size) %sizeof(long) ? 2 : (e_size) ==
    sizeof(long) ? 0 : 1;

    if (e_size == 0 || rank > nmemb) {
    return -EDOM;
    }
    if (nmemb <= 1) {
    return 0;
    }
    left = (rank + 1) / 2;
    right = rank - 1;

    if (!(tmp = malloc(e_size))) {
    return -ENOMEM;
    }

    while (0 < left) {
    left--;
    i = left;
    i_stride = i * e_size;
    memcpy(tmp, base + i_stride, e_size);
    while ((j = 2 * i + 1) <= right) {
    j_stride = j * e_size;
    char *current = base + j_stride;
    if (j < right && (*cmp) (current, current + e_size) < 0) {
    j++;
    j_stride += e_size;
    }
    if ((*cmp) (base + j_stride, tmp) < 0) {
    break;
    }
    memcpy(base + i_stride, base + j_stride, e_size);
    i = j;
    i_stride = j_stride;
    }
    if (i != left) {
    memcpy(base + i_stride, tmp, e_size);
    }
    }

    for (mid = right + 1; mid < nmemb; mid++) {
    mid_stride = mid * e_size;
    if ((*cmp) (base + mid_stride, base) < 0) {

    if (swaptype == 0) {
    char *p = base + mid_stride;
    const long t = *(long *) (void *) p;
    *(long *) (void *) p = *(long *) (void *) base;
    *(long *) (void *) base = t;
    } else
    swapfunc(base + mid_stride, base, e_size, swaptype);

    i = 0;
    i_stride = 0;
    memcpy(tmp, base + i_stride, e_size);
    while ((j = 2 * i + 1) <= right) {
    j_stride = j * e_size;
    char *current = base + j_stride;
    if (j < right && (*cmp) (current, current + e_size) <
    0) {
    j++;
    j_stride += e_size;
    }
    if ((*cmp) (base + j_stride, tmp) < 0) {
    break;
    }
    memcpy(base + i_stride, base + j_stride, e_size);
    i = j;
    i_stride = j_stride;
    }
    if (i != 0) {
    memcpy(base + i_stride, tmp, e_size);
    }
    }
    }

    free(tmp);
    return 0;
    }

    #ifdef UNIT_TEST

    #include <time.h>
    #include <stdio.h>

    int arr[100000];

    int compare(const void *aa, const void *bb)
    {
    int a = *(int *) aa;
    int b = *(int *) bb;
    if (a > b) return 1;
    if (a < b) return -1;
    return 0;
    }

    int main(void)
    {
    size_t index;
    size_t rank;
    size_t arrlen = sizeof arr / sizeof arr[0];
    int position;
    srand((unsigned) time(NULL));
    for (index = 0; index < arrlen; index++) {
    arr[index] = rand();
    }
    rank = rand() / (RAND_MAX / arrlen + 1);
    position = heapselect(arr, rank, arrlen, sizeof arr[0], compare);
    if (position < 0)
    printf("Error in heapselect = %d\n", position);
    else
    printf("item (%u) is %d\n", (unsigned) rank, arr[position]);
    qsort(arr, arrlen, sizeof arr[0], compare);
    printf("sorted item (%u) is %d\n", (unsigned) rank, arr[rank]);
    return 0;
    }

    #endif
     
    user923005, Jun 6, 2007
    #1
    1. Advertising

  2. user923005

    user923005 Guest

    Posted to the wrong group, sorry. I meant news:comp.programming but
    forgot where I was.

    On Jun 5, 10:57 pm, user923005 <> wrote:
    > /*
    > Tragically, I have bollixed it up badly. It's such a simple
    > algorithm, but somehow I've gone off, because it does not always
    > work. Any ideas where I went off?
    > */
    >
    > #include <stdlib.h>
    > #include <string.h>
    > #include <errno.h>
    >
    > /*
    > We will swap sizeof(long) bytes at a time if possible (if the data is
    > long aligned),
    > else a char at a time.
    > */
    > static void swapfunc(char *a, char *b, const size_t e_size, const
    > int swaptype)
    > {
    > if (swaptype <= 1) {
    > size_t i = (e_size) / sizeof(long);
    > long *pi = (long *) (void *) a;
    > long *pj = (long *) (void *) b;
    > do {
    > const long t = *pi;
    > *pi++ = *pj;
    > *pj++ = t;
    > } while (--i > 0);
    > } else {
    > size_t i = (e_size);
    > char *pi = (char *) (void *) a;
    > char *pj = (char *) (void *) b;
    > do {
    > const char t = *pi;
    > *pi++ = *pj;
    > *pj++ = t;
    > } while (--i > 0);
    > }}
    >
    > /*
    > Citation:http://www.ics.uci.edu/~eppstein/161/960125.html
    >
    > This beautiful algorithm:
    >
    > heapselect(L,k)
    > {
    > heap H = heapify(L)
    > for (i = 1; i < k; i++) remove min(H)
    > return min(H)
    > }
    >
    > has slowly transmongrified itself into this awful slop:
    > */
    > int heapselect(void *voidbase, const size_t rank, const
    > size_t nmemb, const size_t e_size, int (*cmp) (const void *, const
    > void *))
    > {
    > char *base = voidbase; /* Base of the array */
    > char *tmp; /* will hold temporary element after
    > allocation */
    > size_t i, /* array position indicator */
    > i_stride, /* array position indicator, adjusted
    > for element stride */
    > j, /* array position indicator */
    > j_stride, /* array position indicator, adjusted
    > for element stride */
    > left, /* array position indicator */
    > mid, /* array position indicator */
    > mid_stride, /* array position indicator, adjusted
    > for element stride */
    > right; /* array position indicator */
    > /* swaptype will tell us about the alignment of the data type, for
    > opimization of exchange */
    > const unsigned swaptype = ((char *) (base) - (char *) 0) %
    > sizeof(long) || (e_size) %sizeof(long) ? 2 : (e_size) ==
    > sizeof(long) ? 0 : 1;
    >
    > if (e_size == 0 || rank > nmemb) {
    > return -EDOM;
    > }
    > if (nmemb <= 1) {
    > return 0;
    > }
    > left = (rank + 1) / 2;
    > right = rank - 1;
    >
    > if (!(tmp = malloc(e_size))) {
    > return -ENOMEM;
    > }
    >
    > while (0 < left) {
    > left--;
    > i = left;
    > i_stride = i * e_size;
    > memcpy(tmp, base + i_stride, e_size);
    > while ((j = 2 * i + 1) <= right) {
    > j_stride = j * e_size;
    > char *current = base + j_stride;
    > if (j < right && (*cmp) (current, current + e_size) < 0) {
    > j++;
    > j_stride += e_size;
    > }
    > if ((*cmp) (base + j_stride, tmp) < 0) {
    > break;
    > }
    > memcpy(base + i_stride, base + j_stride, e_size);
    > i = j;
    > i_stride = j_stride;
    > }
    > if (i != left) {
    > memcpy(base + i_stride, tmp, e_size);
    > }
    > }
    >
    > for (mid = right + 1; mid < nmemb; mid++) {
    > mid_stride = mid * e_size;
    > if ((*cmp) (base + mid_stride, base) < 0) {
    >
    > if (swaptype == 0) {
    > char *p = base + mid_stride;
    > const long t = *(long *) (void *) p;
    > *(long *) (void *) p = *(long *) (void *) base;
    > *(long *) (void *) base = t;
    > } else
    > swapfunc(base + mid_stride, base, e_size, swaptype);
    >
    > i = 0;
    > i_stride = 0;
    > memcpy(tmp, base + i_stride, e_size);
    > while ((j = 2 * i + 1) <= right) {
    > j_stride = j * e_size;
    > char *current = base + j_stride;
    > if (j < right && (*cmp) (current, current + e_size) <
    > 0) {
    > j++;
    > j_stride += e_size;
    > }
    > if ((*cmp) (base + j_stride, tmp) < 0) {
    > break;
    > }
    > memcpy(base + i_stride, base + j_stride, e_size);
    > i = j;
    > i_stride = j_stride;
    > }
    > if (i != 0) {
    > memcpy(base + i_stride, tmp, e_size);
    > }
    > }
    > }
    >
    > free(tmp);
    > return 0;
    >
    > }
    >
    > #ifdef UNIT_TEST
    >
    > #include <time.h>
    > #include <stdio.h>
    >
    > int arr[100000];
    >
    > int compare(const void *aa, const void *bb)
    > {
    > int a = *(int *) aa;
    > int b = *(int *) bb;
    > if (a > b) return 1;
    > if (a < b) return -1;
    > return 0;
    >
    > }
    >
    > int main(void)
    > {
    > size_t index;
    > size_t rank;
    > size_t arrlen = sizeof arr / sizeof arr[0];
    > int position;
    > srand((unsigned) time(NULL));
    > for (index = 0; index < arrlen; index++) {
    > arr[index] = rand();
    > }
    > rank = rand() / (RAND_MAX / arrlen + 1);
    > position = heapselect(arr, rank, arrlen, sizeof arr[0], compare);
    > if (position < 0)
    > printf("Error in heapselect = %d\n", position);
    > else
    > printf("item (%u) is %d\n", (unsigned) rank, arr[position]);
    > qsort(arr, arrlen, sizeof arr[0], compare);
    > printf("sorted item (%u) is %d\n", (unsigned) rank, arr[rank]);
    > return 0;
    >
    > }
    >
    > #endif
     
    user923005, Jun 6, 2007
    #2
    1. Advertising

  3. user923005

    user923005 Guest

    On Jun 6, 12:09 am, CBFalconer <> wrote:
    > user923005 wrote:
    >
    > > /*
    > > Tragically, I have bollixed it up badly. It's such a simple
    > > algorithm, but somehow I've gone off, because it does not always
    > > work. Any ideas where I went off?
    > > */

    >
    > ... snip 200 lines of code ...
    >
    > As you say, bollixed. Tear it up and start over. Use void
    > pointers.


    The problem with void pointers is that they have no stride.
    So you can't move data with them.
     
    user923005, Jun 6, 2007
    #3
  4. user923005

    Flash Gordon Guest

    Re: Speaking of selection algorithms, I had never heard of heapselect,so I wrote one.

    user923005 wrote, On 06/06/07 19:16:
    > On Jun 6, 12:09 am, CBFalconer <> wrote:
    >> user923005 wrote:
    >>
    >>> /*
    >>> Tragically, I have bollixed it up badly. It's such a simple
    >>> algorithm, but somehow I've gone off, because it does not always
    >>> work. Any ideas where I went off?
    >>> */

    >> ... snip 200 lines of code ...
    >>
    >> As you say, bollixed. Tear it up and start over. Use void
    >> pointers.

    >
    > The problem with void pointers is that they have no stride.
    > So you can't move data with them.


    Unless you use memcpy/memmove. Of course, you need to know how much data
    to move.
    --
    Flash Gordon
     
    Flash Gordon, Jun 6, 2007
    #4
  5. user923005

    user923005 Guest

    On Jun 6, 2:13 pm, Flash Gordon <> wrote:
    > user923005 wrote, On 06/06/07 19:16:
    >
    > > On Jun 6, 12:09 am, CBFalconer <> wrote:
    > >> user923005 wrote:

    >
    > >>> /*
    > >>> Tragically, I have bollixed it up badly. It's such a simple
    > >>> algorithm, but somehow I've gone off, because it does not always
    > >>> work. Any ideas where I went off?
    > >>> */
    > >> ... snip 200 lines of code ...

    >
    > >> As you say, bollixed. Tear it up and start over. Use void
    > >> pointers.

    >
    > > The problem with void pointers is that they have no stride.
    > > So you can't move data with them.

    >
    > Unless you use memcpy/memmove. Of course, you need to know how much data
    > to move.


    If you look at the user of pointers in my program you will see that
    char pointers were chosen for a reason.
    The user interfaces are void pointers for the reason of generic
    access, but the exchange is optimized for speed.
    Of course, I posted to the wrong newsgroup, and so I expect I won't
    get much utility on the responses here.
     
    user923005, Jun 6, 2007
    #5
    1. Advertising

Want to reply to this thread or ask your own question?

It takes just 2 minutes to sign up (and it's free!). Just click the sign up button to choose a username and then you can ask your own questions on the forum.
Similar Threads
  1. Soren Kuula
    Replies:
    1
    Views:
    457
    Henry S. Thompson
    Dec 1, 2005
  2. Jumbo
    Replies:
    5
    Views:
    362
    Old Wolf
    Jan 23, 2004
  3. Kevin
    Replies:
    4
    Views:
    430
    Irrwahn Grausewitz
    Oct 17, 2003
  4. c
    Replies:
    43
    Views:
    1,548
    Richard
    Dec 15, 2007
  5. McKirahan

    Y2K? Never heard of it...

    McKirahan, Dec 27, 2004, in forum: Javascript
    Replies:
    7
    Views:
    107
    Douglas Crockford
    Dec 30, 2004
Loading...

Share This Page