E
eLVa
Hi everyone,
Below is a test code I made after noticing a big difference in terms
of running time between the use of std::min or std::max and a
templated version of it. I'm not sure where it does come from, so any
comments are welcome.
The code does not anything usefull, but it is a simplification of the
real code (the purpose is only to show the difference of timings ..)
I don't know if there is a proper way of posting a chunk of code, so I
just drop it here ...
#include <algorithm>
#include <iostream>
#include <sys/time.h>
typedef unsigned char uchar;
typedef unsigned int uint;
using namespace std;
void fn(const uchar *pL, const uchar *pR, int w, uint *pD) {
int vlm, vlp;
for (int i=0; i<w; ++i) {
vlm = (i>0 ? (pL[i-1]+pL)/2 : pL);
vlp = (i<w-1 ? (pL+pL[i+1])/2 : pL);
pD = std::min<int>(vlp,std::max<int>(vlm,pR));
}
}
template <class T> T min2(const T&a, const T&b) { return a<b?a:b; }
template <class T> T max2(const T&a, const T&b) { return a<b?a:b; }
void fn2(const uchar *pL, const uchar *pR, int w, uint *pD) {
int vlm, vlp;
for (int i=0; i<w; ++i) {
vlm = (i>0 ? (pL[i-1]+pL)/2 : pL);
vlp = (i<w-1 ? (pL+pL[i+1])/2 : pL);
pD = min2<int>(vlp,max2<int>(vlm,pR));
}
}
int main(int argc, char *argv[]) {
int h = 400, w = 500;
uchar *T1[h], *T2[h];
uint *T3[h];
for (int i=0; i<h; ++i) {
T1 = new uchar[w];
T2 = new uchar[w];
T3 = new uint[w];
}
for (int j=0; j<h; ++j) {
for (int i=0; i<w; ++i) {
T1[j] = rand()%256;
T2[j] = rand()%256;
}
}
struct timeval t1, t2;
gettimeofday(&t1, NULL);
for (int z=0; z<1000; ++z) {
for (int j=0; j<h; ++j) {
uchar *p1 = T1[j];
uchar *p2 = T2[j];
uint *p3 = T3[j];
fn(p1, p2, w, p3);
}
}
gettimeofday(&t2, NULL);
cout << "Took " << (t2.tv_sec-t1.tv_sec)+(t2.tv_usec-t1.tv_usec)/
1.e6 << endl;
gettimeofday(&t1, NULL);
for (int z=0; z<1000; ++z) {
for (int j=0; j<h; ++j) {
uchar *p1 = T1[j];
uchar *p2 = T2[j];
uint *p3 = T3[j];
fn2(p1, p2, w, p3);
}
}
gettimeofday(&t2, NULL);
cout << "Took " << (t2.tv_sec-t1.tv_sec)+(t2.tv_usec-t1.tv_usec)/
1.e6 << endl;
}
Then I compiled it with :
g++ test.cpp -O3 -o test
And this are the results :
Took 1.58356
Took 0.789235
So the second version which use templated functions is approx 2times
faster. Is it normal ?
I tested it on MacosX (10.5) (the timings shown above), and on Linux
where timings are (5.45 for the first version and 4.71 for the second
one : the machine is older, thus the big difference, but still the
second is faster).
Can anyone reproduce this and tell me if this is normal.
Thanks
Below is a test code I made after noticing a big difference in terms
of running time between the use of std::min or std::max and a
templated version of it. I'm not sure where it does come from, so any
comments are welcome.
The code does not anything usefull, but it is a simplification of the
real code (the purpose is only to show the difference of timings ..)
I don't know if there is a proper way of posting a chunk of code, so I
just drop it here ...
#include <algorithm>
#include <iostream>
#include <sys/time.h>
typedef unsigned char uchar;
typedef unsigned int uint;
using namespace std;
void fn(const uchar *pL, const uchar *pR, int w, uint *pD) {
int vlm, vlp;
for (int i=0; i<w; ++i) {
vlm = (i>0 ? (pL[i-1]+pL)/2 : pL);
vlp = (i<w-1 ? (pL+pL[i+1])/2 : pL);
pD = std::min<int>(vlp,std::max<int>(vlm,pR));
}
}
template <class T> T min2(const T&a, const T&b) { return a<b?a:b; }
template <class T> T max2(const T&a, const T&b) { return a<b?a:b; }
void fn2(const uchar *pL, const uchar *pR, int w, uint *pD) {
int vlm, vlp;
for (int i=0; i<w; ++i) {
vlm = (i>0 ? (pL[i-1]+pL)/2 : pL);
vlp = (i<w-1 ? (pL+pL[i+1])/2 : pL);
pD = min2<int>(vlp,max2<int>(vlm,pR));
}
}
int main(int argc, char *argv[]) {
int h = 400, w = 500;
uchar *T1[h], *T2[h];
uint *T3[h];
for (int i=0; i<h; ++i) {
T1 = new uchar[w];
T2 = new uchar[w];
T3 = new uint[w];
}
for (int j=0; j<h; ++j) {
for (int i=0; i<w; ++i) {
T1[j] = rand()%256;
T2[j] = rand()%256;
}
}
struct timeval t1, t2;
gettimeofday(&t1, NULL);
for (int z=0; z<1000; ++z) {
for (int j=0; j<h; ++j) {
uchar *p1 = T1[j];
uchar *p2 = T2[j];
uint *p3 = T3[j];
fn(p1, p2, w, p3);
}
}
gettimeofday(&t2, NULL);
cout << "Took " << (t2.tv_sec-t1.tv_sec)+(t2.tv_usec-t1.tv_usec)/
1.e6 << endl;
gettimeofday(&t1, NULL);
for (int z=0; z<1000; ++z) {
for (int j=0; j<h; ++j) {
uchar *p1 = T1[j];
uchar *p2 = T2[j];
uint *p3 = T3[j];
fn2(p1, p2, w, p3);
}
}
gettimeofday(&t2, NULL);
cout << "Took " << (t2.tv_sec-t1.tv_sec)+(t2.tv_usec-t1.tv_usec)/
1.e6 << endl;
}
Then I compiled it with :
g++ test.cpp -O3 -o test
And this are the results :
Took 1.58356
Took 0.789235
So the second version which use templated functions is approx 2times
faster. Is it normal ?
I tested it on MacosX (10.5) (the timings shown above), and on Linux
where timings are (5.45 for the first version and 4.71 for the second
one : the machine is older, thus the big difference, but still the
second is faster).
Can anyone reproduce this and tell me if this is normal.
Thanks