Phlip said:
The topic we have all been avoiding:
Is it possible, somewhere, somehow, that throwing the exception could
actually be faster than returning?
Yes. Here is a benchmark program.
First, the CPU and compiler information. Linux gives us this, so I will
just paste. This box has four of these processors, by the way:
processor : 0
vendor_id : GenuineIntel
cpu family : 15
model : 4
model name : Intel(R) Xeon(TM) CPU 2.80GHz
stepping : 1
cpu MHz : 2793.087
cache size : 1024 KB
physical id : 0
siblings : 2
runqueue : 0
fdiv_bug : no
hlt_bug : no
f00f_bug : no
coma_bug : no
fpu : yes
fpu_exception : yes
cpuid level : 5
wp : yes
flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge
mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm lm
bogomips : 5570.56
Next, the compiler, GNU C++:
g++ (GCC) 3.2.3 20030502 (Red Hat Linux 3.2.3-49)
I just used this with -O2 to make an a.out.
The program:
#include <cstdio>
#include <ctime>
using std:
rintf;
using std::clock_t;
using std::clock;
const int calibrate_iterations = 1000000;
const int return_test_iterations = 100000;
const int exception_test_iterations = 100000;
const int num_call_depths = 10;
const int starting_call_depth = 100;
const int call_depth_increment = 100;
static clock_t start;
static long sample_sum;
static long sample_count;
static void reset_chrono()
{
start = sample_sum = sample_count = 0;
}
static void start_chrono()
{
start = clock();
}
static void stop_chrono()
{
clock_t stop = clock();
clock_t diff = stop - start;
sample_sum += diff;
sample_count++;
}
double get_average_time()
{
return ((double) sample_sum) / sample_count / CLOCKS_PER_SEC;
}
/*
* Regular return.
*/
bool rt_top(int, int = 0);
bool rt_middle(int, int);
bool rt_bottom(int, int);
bool rt_test(int max_depth)
{
return rt_top(max_depth);
}
bool rt_top(int max_depth, int depth)
{
return rt_middle(max_depth, depth + 1);
}
bool rt_middle(int max_depth, int depth)
{
return rt_bottom(max_depth, depth + 1);
}
bool rt_bottom(int max_depth, int depth)
{
if (depth >= max_depth)
return false;
return rt_top(max_depth, depth + 1);
}
/*
* Exception-based return
*/
bool ex_top(int, int = 0);
bool ex_middle(int, int);
bool ex_bottom(int, int);
bool ex_test(int max_depth)
try {
return ex_top(max_depth);
} catch (...) {
return false;
}
bool ex_top(int max_depth, int depth)
{
ex_middle(max_depth, depth + 1);
}
bool ex_middle(int max_depth, int depth)
{
ex_bottom(max_depth, depth + 1);
}
bool ex_bottom(int max_depth, int depth)
{
if (depth >= max_depth)
throw false;
ex_top(max_depth, depth + 1);
}
int main()
{
/*
* First, calibrate test.
*/
reset_chrono();
for (int i = 0; i < calibrate_iterations; i++) {
start_chrono();
stop_chrono();
}
double chrono_bias = get_average_time();
printf("chrono_bias = %12.9fs\n", chrono_bias);
/*
* Now do return speed test.
*/
for (int depth = 0; depth < num_call_depths; depth++) {
int max_depth = starting_call_depth + depth *
call_depth_increment;
reset_chrono();
for (int i = 0; i < return_test_iterations; i++) {
start_chrono();
rt_test(max_depth);
stop_chrono();
}
double return_time = get_average_time() - chrono_bias;
reset_chrono();
for (int i = 0; i < exception_test_iterations; i++) {
start_chrono();
ex_test(max_depth);
stop_chrono();
}
double exception_time = get_average_time() - chrono_bias;
printf("\n");
printf("max_depth = %d\n", max_depth);
printf("return_time = %12.9fs\n", return_time);
printf("exception_time = %12.9fs\n", exception_time);
}
return 0;
}
The results:
chrono_bias = 0.000000400s
max_depth = 100
return_time = 0.000002000s
exception_time = 0.000009000s
max_depth = 200
return_time = 0.000003000s
exception_time = 0.000009400s
max_depth = 300
return_time = 0.000004900s
exception_time = 0.000009900s
max_depth = 400
return_time = 0.000006300s
exception_time = 0.000010000s
max_depth = 500
return_time = 0.000007700s
exception_time = 0.000010800s
max_depth = 600
return_time = 0.000009400s
exception_time = 0.000010500s
max_depth = 700
return_time = 0.000010900s
exception_time = 0.000011300s
max_depth = 800
return_time = 0.000012800s
exception_time = 0.000011100s
max_depth = 900
return_time = 0.000014200s
exception_time = 0.000011900s
max_depth = 1000
return_time = 0.000016400s
exception_time = 0.000012100s
As you can see, as the stack grows deeper, the exception mechanism
eventually becomes faster. The break-even point is somewhere at a depth
of about 700 to 800.
The results suggest that that exceptions have a high fixed cost on this
compiler, but a marginal additional cost to jump deeper.
In one experimental run, I moved the start_chrono() call for the
exception test to within the try { } region, and the results weren't
significantly affected. So there isn't much cost to setting up the
catch. The large, fixed overhead is somewhere in the processing of the
exception.