I
ikarus
Hello C++ Gurus!
I'm comparing sorting algorithm for study goals. I've compared STL
std::sort and hand-coded introsort on millions (tens of millions) of
integers array sorting. It was tested on random, sorted, and sorted in
reverse arrays. In all tests on such a huge arrays std::sort is faster
by ~1/4 then my version. So it seems that it some call optimization.
My 'hand coded' version is based on D. Musser's suggestions and on
some STL header which I've found in diffrent STL installations (GCC,
and MSVS2008). The maximum optimization flags was used in both cases,
for g++ and cl. But 'hand-coded' version is still slower.
Why? May be in production std::sort version it is used some SIMD
optimization internally or assembler optiomization?
Thanks in advance.
Alex.
Pseudocode (C++, may be bugs) of introsort version:
// CODE
template <typename T>
inline void swap(T *a, T *b)
{
T temp = *a;
*a = *b;
*b = temp;
}
template <typename T>
inline void cmpswap(T *a, T *b)
{
if (*a > *b) {
T temp = *a;
*a = *b;
*b = temp;
}
}
template <typename T>
inline T *partition(T *first, T *last)
{
T *v = last-1;
first--;
do {
while(*(++first) > *v);
while(*last-- < *v) if (first == last) break;
if (first >= last) break;
swap(first,last);
} while(true);
swap(v,first);
return ++first;
}
template <typename T>
inline void median3(T *first, T *last)
{
T *r = last-1, *rr = last-2;
swap(first + (last - first)/2 , rr);
cmpswap(first, rr);
cmpswap(first, r);
cmpswap(rr, r);
}
template <typename T>
inline void _introsort(T *first, T *last, int recurs_depth)
{
while(last - first > M) {
if (recurs_depth--) return hsort(first, last);
median3(first, last);
T *k = partition(first+1, last-1);
introsort(k,last, recurs_depth);
last = k;
}
}
template <typename T>
void introsort(T *first, T *last, int recurs_depth)
{
_introsort(first, last, recurs_depth);
insertion(first, last);
}
// CODE END
I'm comparing sorting algorithm for study goals. I've compared STL
std::sort and hand-coded introsort on millions (tens of millions) of
integers array sorting. It was tested on random, sorted, and sorted in
reverse arrays. In all tests on such a huge arrays std::sort is faster
by ~1/4 then my version. So it seems that it some call optimization.
My 'hand coded' version is based on D. Musser's suggestions and on
some STL header which I've found in diffrent STL installations (GCC,
and MSVS2008). The maximum optimization flags was used in both cases,
for g++ and cl. But 'hand-coded' version is still slower.
Why? May be in production std::sort version it is used some SIMD
optimization internally or assembler optiomization?
Thanks in advance.
Alex.
Pseudocode (C++, may be bugs) of introsort version:
// CODE
template <typename T>
inline void swap(T *a, T *b)
{
T temp = *a;
*a = *b;
*b = temp;
}
template <typename T>
inline void cmpswap(T *a, T *b)
{
if (*a > *b) {
T temp = *a;
*a = *b;
*b = temp;
}
}
template <typename T>
inline T *partition(T *first, T *last)
{
T *v = last-1;
first--;
do {
while(*(++first) > *v);
while(*last-- < *v) if (first == last) break;
if (first >= last) break;
swap(first,last);
} while(true);
swap(v,first);
return ++first;
}
template <typename T>
inline void median3(T *first, T *last)
{
T *r = last-1, *rr = last-2;
swap(first + (last - first)/2 , rr);
cmpswap(first, rr);
cmpswap(first, r);
cmpswap(rr, r);
}
template <typename T>
inline void _introsort(T *first, T *last, int recurs_depth)
{
while(last - first > M) {
if (recurs_depth--) return hsort(first, last);
median3(first, last);
T *k = partition(first+1, last-1);
introsort(k,last, recurs_depth);
last = k;
}
}
template <typename T>
void introsort(T *first, T *last, int recurs_depth)
{
_introsort(first, last, recurs_depth);
insertion(first, last);
}
// CODE END