I wrote a more comprehensive test of various abstraction penalties in C++.
Here's what I get on P4 with GCC-3.3.4 -O3 (agressive optimization &
inlining):
$ time ./a.out
f_int(1000) : 9.240s
f_class1(1000) : 13.890s
f_class2(1000) : 19.510s
f_method(1000) : 13.850s
f_macro(1000) : 9.320s
f<int>(1000) : 9.240s
f<c>(1000) : 19.490s
f_get1(1000) : 13.850s
f_get2(1000) : 32.660s
real 2m21.092s
user 2m20.928s
sys 0m0.133s
Lessons learned:
* regular accessors (getters & setters) didn't help
* very minor things can confuse the optimizer (class1 vs class2)
I'm be curious to know how other CPUs/compilers do. Program text follows.
#include <iostream>
#include <ctime>
#include <iomanip>
using namespace std;
double time() { return double(clock()) / CLOCKS_PER_SEC; }
#define TIME_INC(e, res) { \
double t1 = time(); \
(res) += (e); \
double t2 = time(); \
cout << setw(15) << #e << " : " \
<< setw(7) << fixed << setprecision(3) \
<< t2 - t1 << "s" << endl; \
}
class c {
public:
int x;
c() {}
c(int i) : x(i) {}
operator const int& () const { return x; }
operator int& () { return x; }
const int& i() const { return x; }
int& i() { return x; }
int get() const { return x; }
void set(int i) { x = i; }
};
#define LOOP1(i, n) for((i) = -(n); (i) <= (n); ++(i))
#define LOOP(i, j, k, n, a) \
a = 0; \
LOOP1(i, n) LOOP1(j, n) LOOP1(k, n) \
a += k + j; return a
int f_int(int n) {
int i, j, k, acc;
LOOP(i, j, k, n, acc);
}
int f_class1(c n) {
c i, j, k, acc;
LOOP(i, j, k, n, acc);
}
// the return type is different from the above
c f_class2(c n) {
c i, j, k, acc;
LOOP(i, j, k, n, acc);
}
int f_method(c n) {
c i, j, k, acc;
LOOP(i.i(), j.i(), k.i(), n.i(), acc.i());
}
// very similar, but 1.5x faster!
#define I(e) (e).x
int f_macro(c n) {
c i, j, k, acc;
LOOP(I(i), I(j), I(k), I(n), I(acc));
}
template<class T>
T f(T n) {
T i, j, k, acc;
LOOP(i, j, k, n, acc);
}
int f_get1(c n) {
c i, j, k, acc = 0;
for(i.set(-n.get()); i.get() <= n.get(); i.set(i.get() + 1))
for(j.set(-n.get()); j.get() <= n.get(); j.set(j.get() + 1))
for(k.set(-n.get()); k.get() <= n.get(); k.set(k.get() + 1))
acc.set(acc.get() + k.get() + j.get());
return acc;
}
// the return type makes a big difference:
c f_get2(c n) {
c i, j, k, acc = 0;
for(i.set(-n.get()); i.get() <= n.get(); i.set(i.get() + 1))
for(j.set(-n.get()); j.get() <= n.get(); j.set(j.get() + 1))
for(k.set(-n.get()); k.get() <= n.get(); k.set(k.get() + 1))
acc.set(acc.get() + k.get() + j.get());
return acc;
}
#define TIME(e) TIME_INC(e(1000), dummy)
int main() {
int dummy = 0;
TIME(f_int);
TIME(f_class1)
TIME(f_class2)
TIME(f_method);
TIME(f_macro);
TIME(f<int>);
TIME(f<c>);
TIME(f_get1);
TIME(f_get2);
return dummy;
}