F
franky.backeljauw
Hello,
I am wondering which of these two methods is the fastest: std::copy, which
is included in the standard library, or a manually written pointer copy?
Do any of you have any experience with this?
I would think that the library function std::copy would perform optimally,
as it is a library function, and therefore the writers of this function
would know best how to optimize it ... but some tests seem to indicate
that my pointer copy function can do a lot better, especially when its
loop is vectorized. I have used g++ and icc with -O2, and icc with -O2
-march=pentiumiii -xK -tpp6 as compiler options.
Or is there another way to look at this? For completeness, I include my
test program:
<code>
#include <ctime>
#include <algorithm>
#include <iostream>
using namespace std;
typedef double Element;
class Vector
{
public:
Vector( unsigned int newsize ) { array = new Element[ newsize ]; size = newsize; }
~Vector() { if ( array ) delete( array ), array = 0; }
void init() { for ( unsigned int i = 0; i<size; i++ ) array[ i ] = i; }
void print() { for ( unsigned int i = 0; i<size; i++ ) cout << array[ i ] << " "; cout << endl; }
friend void iCopy( Element *dst, const Element *src, int n ) { int i = n + 1; while ( i-- > 0 ) *dst++ = *src++; }
friend void sCopy( Element *dst, const Element *src, int n ) { std::copy( src, src+n+1, dst ); }
unsigned int size;
Element *array;
};
int main()
{
Vector a( 10 ), b( 10 ), c( 10 ); a.init();
cout << "a : "; a.print();
cout << "b : "; b.print();
cout << "c : "; c.print();
iCopy( b.array, a.array, 7 );
cout << "b after iCopy : "; b.print();
sCopy( c.array, a.array, 7 );
cout << "c after sCopy : "; c.print();
clock_t start, stop, taken;
int VECTOR_SIZE = 5000000;
Vector d( VECTOR_SIZE ), e( VECTOR_SIZE );
d.init();
start = clock();
for ( int i = 0 ; i < 10; i++ ) iCopy( e.array, d.array, VECTOR_SIZE-10 );
stop = clock();
taken = ( stop - start ) / 1000;
cerr << "time for iCopy : " << taken << endl;
start = clock();
for ( int i = 0; i < 10; i++ ) sCopy( e.array, d.array, VECTOR_SIZE-1 );
stop = clock();
taken = ( stop - start ) / 1000;
cerr << "time for sCopy : " << taken << endl;
start = clock();
for ( int i = 0 ; i < 10; i++ ) iCopy( e.array, d.array, VECTOR_SIZE-10 );
stop = clock();
taken = ( stop - start ) / 1000;
cerr << "time for iCopy : " << taken << endl;
start = clock();
for ( int i = 0; i < 10; i++ ) sCopy( e.array, d.array, VECTOR_SIZE-1 );
stop = clock();
taken = ( stop - start ) / 1000;
cerr << "time for sCopy : " << taken << endl;
return 0;
}
</code>
Thanks for any reply.
Regards,
Franky B.
I am wondering which of these two methods is the fastest: std::copy, which
is included in the standard library, or a manually written pointer copy?
Do any of you have any experience with this?
I would think that the library function std::copy would perform optimally,
as it is a library function, and therefore the writers of this function
would know best how to optimize it ... but some tests seem to indicate
that my pointer copy function can do a lot better, especially when its
loop is vectorized. I have used g++ and icc with -O2, and icc with -O2
-march=pentiumiii -xK -tpp6 as compiler options.
Or is there another way to look at this? For completeness, I include my
test program:
<code>
#include <ctime>
#include <algorithm>
#include <iostream>
using namespace std;
typedef double Element;
class Vector
{
public:
Vector( unsigned int newsize ) { array = new Element[ newsize ]; size = newsize; }
~Vector() { if ( array ) delete( array ), array = 0; }
void init() { for ( unsigned int i = 0; i<size; i++ ) array[ i ] = i; }
void print() { for ( unsigned int i = 0; i<size; i++ ) cout << array[ i ] << " "; cout << endl; }
friend void iCopy( Element *dst, const Element *src, int n ) { int i = n + 1; while ( i-- > 0 ) *dst++ = *src++; }
friend void sCopy( Element *dst, const Element *src, int n ) { std::copy( src, src+n+1, dst ); }
unsigned int size;
Element *array;
};
int main()
{
Vector a( 10 ), b( 10 ), c( 10 ); a.init();
cout << "a : "; a.print();
cout << "b : "; b.print();
cout << "c : "; c.print();
iCopy( b.array, a.array, 7 );
cout << "b after iCopy : "; b.print();
sCopy( c.array, a.array, 7 );
cout << "c after sCopy : "; c.print();
clock_t start, stop, taken;
int VECTOR_SIZE = 5000000;
Vector d( VECTOR_SIZE ), e( VECTOR_SIZE );
d.init();
start = clock();
for ( int i = 0 ; i < 10; i++ ) iCopy( e.array, d.array, VECTOR_SIZE-10 );
stop = clock();
taken = ( stop - start ) / 1000;
cerr << "time for iCopy : " << taken << endl;
start = clock();
for ( int i = 0; i < 10; i++ ) sCopy( e.array, d.array, VECTOR_SIZE-1 );
stop = clock();
taken = ( stop - start ) / 1000;
cerr << "time for sCopy : " << taken << endl;
start = clock();
for ( int i = 0 ; i < 10; i++ ) iCopy( e.array, d.array, VECTOR_SIZE-10 );
stop = clock();
taken = ( stop - start ) / 1000;
cerr << "time for iCopy : " << taken << endl;
start = clock();
for ( int i = 0; i < 10; i++ ) sCopy( e.array, d.array, VECTOR_SIZE-1 );
stop = clock();
taken = ( stop - start ) / 1000;
cerr << "time for sCopy : " << taken << endl;
return 0;
}
</code>
Thanks for any reply.
Regards,
Franky B.