M
mast2as
Hi everyone
I am working on some code that uses colors. Until recently this code
used colors represented a tree floats (RGB format) but recently changed
so colors are now defined as spectrum. The size of the vector went from
3 (RGB) to 151 (400 nm to 700 with a sample every 2nm). The variables
are using a simple Vector class defined as follow:
template<typename T, int Depth>
class Vector
{ ...
};
Since the move from the RGB version of the code to the Spectral version
the application has significantly slowed dow. I did a test where I use
the Vector class & just a straight usage of arrays of 151 floats on
which the same operations are performed 1 million times.
int maxIter = static_cast<int>( 1e+6 );
#include <time.h>
clock_t c1, c0 = clock();
c0 = clock();
for ( int i = 0; i < maxIter; ++i ) {
float real = 1.245;
float anotherReal = 20.43492342;
float v[ 151 ];
float v2[ 151 ];
memset( v, 0, sizeof( float ) * 151 );
memset( v2, 0, sizeof( float ) * 151 );
// mixing
for ( int j = 0; j < 151; ++j ) {
v[ j ] = v2[ j ] * ( 1.0 - 0.5 ) + v[ j ] * 0.5;
}
// summing up & *
for ( int j = 0; j < 151; ++j ) {
v[ j ] = v[ j ] * real;
}
// summing up & *
for ( int j = 0; j < 151; ++j ) {
v[ j ] = v[ j ] * anotherReal;
}
// summing up & *
for ( int j = 0; j < 151; ++j ) {
v[ j ] += v[ j ];
}
}
c1 = clock();
cerr << "\nfloat[ 151 ]" << endl;
cerr << "end CPU time : " << (long)c1 << endl;
cerr << "elapsed CPU time : " << (float)( c1 - c0 ) / CLOCKS_PER_SEC
<< endl;
c0 = clock();
for ( int i = 0; i < maxIter; ++i ) {
float real = 1.245;
float anotherReal = 20.43492342;
Vector<float, 151> v( 12.0 );
Vector<float, 151> v2( -12.0 );
v = v2 * ( 1.0 - 0.5 ) + v * 0.5;
v += Vector<float, 151>( 10.0 ) * real * anotherReal;
}
c1 = clock();
cerr << "\nSuperVector class" << endl;
cerr << "end CPU time : " << (long)c1 << endl;
cerr << "elapsed CPU time : " << (float)( c1 - c0 ) / CLOCKS_PER_SEC
<< endl;
Here are the results
// RGB version, Vector<float, 3>
end CPU time : 390000
elapsed CPU time : 0.39
// Spectral Version Vector<float, 151>
end CPU time : 10510000
elapsed CPU time : 10.12
// Using arrays of 151 floats
end CPU time : 13230000
elapsed CPU time : 2.72
Basically it of course shows that using the Vector class really really
slows down the application especially has the size of the Vector
increases and is not as efficient as doing the operations on arrays of
floats directly. So basically my question is : is there a way of
optimising it ?
I do realise that doing:
Vector<float, 151> result = Vecotr<float, 151>( 0.1 ) * 0.1 * 100.0;
is not the same as doing:
float result[ 151 ], temp [ 151 ];
for ( int i = 0; i < 151; ++i ) {
temp[ i ] = 0.1f;
result[ i ] = temp[ i ] * 0.1 * 100.0;
}
But isn't there a way i can make the Vector class as efficient as the
second option (which is to do the math operation on arrays of float
directly) ? Or if the speed is a priority is writing some C type of
code the only way i can get it back when the vector size becomes an
issue ?
Thanks for you help -
template<typename T, int Size>
class SuperVector
{
public:
T w[ Size ];
public:
SuperVector()
{ memset( w, 0, sizeof( T ) * Size ); }
SuperVector( const T &real )
{
for ( int i = 0; i < Size; ++i ) {
(*this).w[ i ] = real;
}
}
inline SuperVector<T, Size> operator * ( const SuperVector<T, Size>
&v )
{
SuperVector<T, Size> sv;
for ( int i = 0; i < Size; ++i ) {
sv[ i ] = (*this).w[ i ] * v.w[ i ];
}
return sv;
}
inline SuperVector<T, Size> operator * ( const T &real )
{
SuperVector<T, Size> sv;
for ( int i = 0; i < Size; ++i ) {
(*this).w[ i ] *= real;
}
return sv;
}
inline SuperVector<T, Size> operator + ( const SuperVector<T, Size>
&v )
{
SuperVector<T, Size> sv;
for ( int i = 0; i < Size; ++i ) {
sv.w[ i ] = (*this).w[ i ] + v.w[ i ];
}
return sv;
}
inline SuperVector<T, Size>& operator += ( const SuperVector<T, Size>
&v )
{
for ( int i = 0; i < Size; ++i ) {
(*this).w[ i ] += v.w[ i ];
}
return *this;
}
};
I am working on some code that uses colors. Until recently this code
used colors represented a tree floats (RGB format) but recently changed
so colors are now defined as spectrum. The size of the vector went from
3 (RGB) to 151 (400 nm to 700 with a sample every 2nm). The variables
are using a simple Vector class defined as follow:
template<typename T, int Depth>
class Vector
{ ...
};
Since the move from the RGB version of the code to the Spectral version
the application has significantly slowed dow. I did a test where I use
the Vector class & just a straight usage of arrays of 151 floats on
which the same operations are performed 1 million times.
int maxIter = static_cast<int>( 1e+6 );
#include <time.h>
clock_t c1, c0 = clock();
c0 = clock();
for ( int i = 0; i < maxIter; ++i ) {
float real = 1.245;
float anotherReal = 20.43492342;
float v[ 151 ];
float v2[ 151 ];
memset( v, 0, sizeof( float ) * 151 );
memset( v2, 0, sizeof( float ) * 151 );
// mixing
for ( int j = 0; j < 151; ++j ) {
v[ j ] = v2[ j ] * ( 1.0 - 0.5 ) + v[ j ] * 0.5;
}
// summing up & *
for ( int j = 0; j < 151; ++j ) {
v[ j ] = v[ j ] * real;
}
// summing up & *
for ( int j = 0; j < 151; ++j ) {
v[ j ] = v[ j ] * anotherReal;
}
// summing up & *
for ( int j = 0; j < 151; ++j ) {
v[ j ] += v[ j ];
}
}
c1 = clock();
cerr << "\nfloat[ 151 ]" << endl;
cerr << "end CPU time : " << (long)c1 << endl;
cerr << "elapsed CPU time : " << (float)( c1 - c0 ) / CLOCKS_PER_SEC
<< endl;
c0 = clock();
for ( int i = 0; i < maxIter; ++i ) {
float real = 1.245;
float anotherReal = 20.43492342;
Vector<float, 151> v( 12.0 );
Vector<float, 151> v2( -12.0 );
v = v2 * ( 1.0 - 0.5 ) + v * 0.5;
v += Vector<float, 151>( 10.0 ) * real * anotherReal;
}
c1 = clock();
cerr << "\nSuperVector class" << endl;
cerr << "end CPU time : " << (long)c1 << endl;
cerr << "elapsed CPU time : " << (float)( c1 - c0 ) / CLOCKS_PER_SEC
<< endl;
Here are the results
// RGB version, Vector<float, 3>
end CPU time : 390000
elapsed CPU time : 0.39
// Spectral Version Vector<float, 151>
end CPU time : 10510000
elapsed CPU time : 10.12
// Using arrays of 151 floats
end CPU time : 13230000
elapsed CPU time : 2.72
Basically it of course shows that using the Vector class really really
slows down the application especially has the size of the Vector
increases and is not as efficient as doing the operations on arrays of
floats directly. So basically my question is : is there a way of
optimising it ?
I do realise that doing:
Vector<float, 151> result = Vecotr<float, 151>( 0.1 ) * 0.1 * 100.0;
is not the same as doing:
float result[ 151 ], temp [ 151 ];
for ( int i = 0; i < 151; ++i ) {
temp[ i ] = 0.1f;
result[ i ] = temp[ i ] * 0.1 * 100.0;
}
But isn't there a way i can make the Vector class as efficient as the
second option (which is to do the math operation on arrays of float
directly) ? Or if the speed is a priority is writing some C type of
code the only way i can get it back when the vector size becomes an
issue ?
Thanks for you help -
template<typename T, int Size>
class SuperVector
{
public:
T w[ Size ];
public:
SuperVector()
{ memset( w, 0, sizeof( T ) * Size ); }
SuperVector( const T &real )
{
for ( int i = 0; i < Size; ++i ) {
(*this).w[ i ] = real;
}
}
inline SuperVector<T, Size> operator * ( const SuperVector<T, Size>
&v )
{
SuperVector<T, Size> sv;
for ( int i = 0; i < Size; ++i ) {
sv[ i ] = (*this).w[ i ] * v.w[ i ];
}
return sv;
}
inline SuperVector<T, Size> operator * ( const T &real )
{
SuperVector<T, Size> sv;
for ( int i = 0; i < Size; ++i ) {
(*this).w[ i ] *= real;
}
return sv;
}
inline SuperVector<T, Size> operator + ( const SuperVector<T, Size>
&v )
{
SuperVector<T, Size> sv;
for ( int i = 0; i < Size; ++i ) {
sv.w[ i ] = (*this).w[ i ] + v.w[ i ];
}
return sv;
}
inline SuperVector<T, Size>& operator += ( const SuperVector<T, Size>
&v )
{
for ( int i = 0; i < Size; ++i ) {
(*this).w[ i ] += v.w[ i ];
}
return *this;
}
};