G
Gerald Breuer
I've got a simple straightforward mergesort implementation and the
function mergesort calculates the size for the external memory in a
loop with the result sbSize; the size is calculated in the loop from
the lines with the comments "here" and "to here".
template<typename T, typename Pred>
void mergesort_rc( T *ptElements, T *ptBack, size_t elements,
Pred &pred, T *ptSortBuffer );
template<typename T, typename Pred>
void mergesort( T *ptElements, size_t elements, Pred &pred )
{
if( elements > 2 )
{
size_t sbSize,
sbsAdd;
T *ptSortBuffer;
sbSize = 0; // here
sbsAdd = elements;
do
sbSize += sbsAdd,
sbsAdd = sbsAdd - sbsAdd / 2;
while( sbsAdd > 2 ); // to here
ptSortBuffer = new T[sbSize];
mergesort_rc( ptElements, ptElements, elements, pred,
ptSortBuffer );
delete[] ptSortBuffer;
}
else
if( elements == 2 )
swap( ptElements[0], ptElements[1] );
}
template<typename T, typename Pred>
void mergesort_rc( T *ptElements, T *ptBack, size_t elements, Pred
&pred, T *ptSortBuffer )
{
if( elements > 2 )
{
size_t leftElements,
rightElements;
T *ptLeft,
*ptRight;
T *ptNextSortBuffer;
leftElements = elements / 2;
rightElements = elements - leftElements;
ptLeft = ptSortBuffer;
ptRight = ptSortBuffer + leftElements;
ptNextSortBuffer = ptRight + rightElements;
if( leftElements > 1 )
mergesort_rc( ptElements, ptLeft, leftElements, pred,
ptNextSortBuffer );
else
*ptLeft = *ptElements;
mergesort_rc( ptElements + leftElements, ptRight, rightElements,
pred, ptNextSortBuffer );
for( T *ptLeftEnd = ptRight,
*ptRightEnd = ptNextSortBuffer; ; )
if( pred( *ptLeft, *ptRight ) )
{
*ptBack++ = *ptLeft;
if( ++ptLeft == ptLeftEnd )
return (void)copy_objects( ptRight, ptRightEnd - ptRight,
ptBack );
}
else
{
*ptBack++ = *ptRight;
if( ++ptRight == ptRightEnd )
return (void)copy_objects( ptLeft, ptLeftEnd - ptLeft,
ptBack );
}
}
else
if( pred( ptElements[0], ptElements[1] ) )
ptBack[0] = ptElements[0],
ptBack[1] = ptElements[1];
else
ptBack[0] = ptElements[1],
ptBack[1] = ptElements[0];
}
So I tried to find out if there is a general formula to calculate the
size faster. I found that the resulting sbSize is alwas around 2 * ele-
ments and I thought that if I would add a safe offset to this 2 * ele-
ments I things would go right. So I looked what's the maximum offset
necessary while iterating over a certain input-size.
#include <stdio.h>
#include <limits.h>
template<typename UINT_T> // outputs highest set bit
unsigned hsb( UINT_T ui )
{
UINT_T mask;
unsigned test, bits;
mask = (UINT_T)-1 << (sizeof(UINT_T) * CHAR_BIT / 2);
test = sizeof(UINT_T) * CHAR_BIT / 2;
bits = 0;
do
{
if( ui & mask )
ui >>= test,
bits += test;
test = test / 2;
mask >>= test;
} while( test != 0 );
return bits;
}
int main()
{
int m = 0;
for( unsigned e = 1; e <= 0x80000001u; e++ )
{
unsigned s, a;
int d;
s = 0;
a = e;
do
s += a;
while( (a = a - a / 2) > 2 );
if( (d = (int)(s - 2 * e)) > m )
{
m = d;
printf( "max (% 8X): %d, %u\n", e, m, hsb( e ) );
}
}
return 0;
}
The program outputs the following:
max ( 21): 1, 5
max ( 41): 2, 6
max ( 81): 3, 7
max ( 101): 4, 8
max ( 201): 5, 9
max ( 401): 6, 10
max ( 801): 7, 11
max ( 1001): 8, 12
max ( 2001): 9, 13
max ( 4001): 10, 14
max ( 8001): 11, 15
max ( 10001): 12, 16
max ( 20001): 13, 17
max ( 40001): 14, 18
max ( 80001): 15, 19
max ( 100001): 16, 20
max ( 200001): 17, 21
max ( 400001): 18, 22
max ( 800001): 19, 23
max ( 1000001): 20, 24
max ( 2000001): 21, 25
max ( 4000001): 22, 26
max ( 8000001): 23, 27
max (10000001): 24, 28
max (20000001): 25, 29
max (40000001): 26, 30
max (80000001): 27, 31
So when the size is <= 2^5 I'm fine with a sort-buffer 2*e elements.
From 2^5+1 to 2^6 I I'm fine with a sort-buffer of 2*e+1 and so on.
What's the matematical reason behind that or is there a more simple
formula without any linear or logarithmical (counting the bits) ap-
proach to calculate the sort-buffer-size?
function mergesort calculates the size for the external memory in a
loop with the result sbSize; the size is calculated in the loop from
the lines with the comments "here" and "to here".
template<typename T, typename Pred>
void mergesort_rc( T *ptElements, T *ptBack, size_t elements,
Pred &pred, T *ptSortBuffer );
template<typename T, typename Pred>
void mergesort( T *ptElements, size_t elements, Pred &pred )
{
if( elements > 2 )
{
size_t sbSize,
sbsAdd;
T *ptSortBuffer;
sbSize = 0; // here
sbsAdd = elements;
do
sbSize += sbsAdd,
sbsAdd = sbsAdd - sbsAdd / 2;
while( sbsAdd > 2 ); // to here
ptSortBuffer = new T[sbSize];
mergesort_rc( ptElements, ptElements, elements, pred,
ptSortBuffer );
delete[] ptSortBuffer;
}
else
if( elements == 2 )
swap( ptElements[0], ptElements[1] );
}
template<typename T, typename Pred>
void mergesort_rc( T *ptElements, T *ptBack, size_t elements, Pred
&pred, T *ptSortBuffer )
{
if( elements > 2 )
{
size_t leftElements,
rightElements;
T *ptLeft,
*ptRight;
T *ptNextSortBuffer;
leftElements = elements / 2;
rightElements = elements - leftElements;
ptLeft = ptSortBuffer;
ptRight = ptSortBuffer + leftElements;
ptNextSortBuffer = ptRight + rightElements;
if( leftElements > 1 )
mergesort_rc( ptElements, ptLeft, leftElements, pred,
ptNextSortBuffer );
else
*ptLeft = *ptElements;
mergesort_rc( ptElements + leftElements, ptRight, rightElements,
pred, ptNextSortBuffer );
for( T *ptLeftEnd = ptRight,
*ptRightEnd = ptNextSortBuffer; ; )
if( pred( *ptLeft, *ptRight ) )
{
*ptBack++ = *ptLeft;
if( ++ptLeft == ptLeftEnd )
return (void)copy_objects( ptRight, ptRightEnd - ptRight,
ptBack );
}
else
{
*ptBack++ = *ptRight;
if( ++ptRight == ptRightEnd )
return (void)copy_objects( ptLeft, ptLeftEnd - ptLeft,
ptBack );
}
}
else
if( pred( ptElements[0], ptElements[1] ) )
ptBack[0] = ptElements[0],
ptBack[1] = ptElements[1];
else
ptBack[0] = ptElements[1],
ptBack[1] = ptElements[0];
}
So I tried to find out if there is a general formula to calculate the
size faster. I found that the resulting sbSize is alwas around 2 * ele-
ments and I thought that if I would add a safe offset to this 2 * ele-
ments I things would go right. So I looked what's the maximum offset
necessary while iterating over a certain input-size.
#include <stdio.h>
#include <limits.h>
template<typename UINT_T> // outputs highest set bit
unsigned hsb( UINT_T ui )
{
UINT_T mask;
unsigned test, bits;
mask = (UINT_T)-1 << (sizeof(UINT_T) * CHAR_BIT / 2);
test = sizeof(UINT_T) * CHAR_BIT / 2;
bits = 0;
do
{
if( ui & mask )
ui >>= test,
bits += test;
test = test / 2;
mask >>= test;
} while( test != 0 );
return bits;
}
int main()
{
int m = 0;
for( unsigned e = 1; e <= 0x80000001u; e++ )
{
unsigned s, a;
int d;
s = 0;
a = e;
do
s += a;
while( (a = a - a / 2) > 2 );
if( (d = (int)(s - 2 * e)) > m )
{
m = d;
printf( "max (% 8X): %d, %u\n", e, m, hsb( e ) );
}
}
return 0;
}
The program outputs the following:
max ( 21): 1, 5
max ( 41): 2, 6
max ( 81): 3, 7
max ( 101): 4, 8
max ( 201): 5, 9
max ( 401): 6, 10
max ( 801): 7, 11
max ( 1001): 8, 12
max ( 2001): 9, 13
max ( 4001): 10, 14
max ( 8001): 11, 15
max ( 10001): 12, 16
max ( 20001): 13, 17
max ( 40001): 14, 18
max ( 80001): 15, 19
max ( 100001): 16, 20
max ( 200001): 17, 21
max ( 400001): 18, 22
max ( 800001): 19, 23
max ( 1000001): 20, 24
max ( 2000001): 21, 25
max ( 4000001): 22, 26
max ( 8000001): 23, 27
max (10000001): 24, 28
max (20000001): 25, 29
max (40000001): 26, 30
max (80000001): 27, 31
So when the size is <= 2^5 I'm fine with a sort-buffer 2*e elements.
From 2^5+1 to 2^6 I I'm fine with a sort-buffer of 2*e+1 and so on.
What's the matematical reason behind that or is there a more simple
formula without any linear or logarithmical (counting the bits) ap-
proach to calculate the sort-buffer-size?