zl2k said:
hi, there,
---------code 1-------------
for (vector<int>::interator it = somevector.begin(); it !=
somevector.end(); it++){...}
---------code 2-------------
vector<int>::iterator stop = somevector.end();
for (vector<int>::interator it = somevector.begin(); it != stop; it++)
{...}
Question:
If the somevector's length does not change, is code2 a better
implementation than code1 (since it saves the end() call for each
loop)?
For const_iterator, does stl already do the optimization so that code1
and code2 won't have much difference?
Thanks a lot.
Here is what g++ (version 4.4.3, optimization level -O2) does.
input:
======
#include <vector>
typedef std::vector< int > int_vector;
int sum_a1 ( int_vector const & arg ) {
int result = 0;
for ( int_vector::const_iterator iter = arg.begin();
iter != arg.end(); ++iter ) {
result += *iter;
}
return ( result );
}
int sum_a2 ( int_vector const & arg ) {
int result = 0;
for ( int_vector::const_iterator iter = arg.begin();
iter != arg.end(); iter++ ) {
result += *iter;
}
return ( result );
}
int sum_a3 ( int_vector const & arg ) {
int result = 0;
int_vector::const_iterator past_end = arg.end();
for ( int_vector::const_iterator iter = arg.begin();
iter != past_end; ++iter ) {
result += *iter;
}
return ( result );
}
int sum_b1 ( int_vector & arg ) {
int result = 0;
for ( int_vector::iterator iter = arg.begin();
iter != arg.end(); ++iter ) {
result += *iter;
}
return ( result );
}
int sum_b2 ( int_vector & arg ) {
int result = 0;
for ( int_vector::iterator iter = arg.begin();
iter != arg.end(); iter++ ) {
result += *iter;
}
return ( result );
}
int sum_b3 ( int_vector & arg ) {
int result = 0;
int_vector::iterator past_end = arg.end();
for ( int_vector::iterator iter = arg.begin();
iter != past_end; ++iter ) {
result += *iter;
}
return ( result );
}
output of g++ -O2 -S:
=====================
.file "optimization_001.cc"
.text
.p2align 4,,15
..globl _Z6sum_a1RKSt6vectorIiSaIiEE
.type _Z6sum_a1RKSt6vectorIiSaIiEE, @function
_Z6sum_a1RKSt6vectorIiSaIiEE:
..LFB435:
.cfi_startproc
.cfi_personality 0x0,__gxx_personality_v0
pushl %ebp
.cfi_def_cfa_offset 8
movl %esp, %ebp
.cfi_offset 5, -8
.cfi_def_cfa_register 5
movl 8(%ebp), %eax
movl (%eax), %edx
movl 4(%eax), %ecx
xorl %eax, %eax
cmpl %ecx, %edx
je .L3
.p2align 4,,7
.p2align 3
..L6:
addl (%edx), %eax
addl $4, %edx
cmpl %edx, %ecx
jne .L6
..L3:
popl %ebp
ret
.cfi_endproc
..LFE435:
.size _Z6sum_a1RKSt6vectorIiSaIiEE, .-_Z6sum_a1RKSt6vectorIiSaIiEE
.p2align 4,,15
..globl _Z6sum_a2RKSt6vectorIiSaIiEE
.type _Z6sum_a2RKSt6vectorIiSaIiEE, @function
_Z6sum_a2RKSt6vectorIiSaIiEE:
..LFB436:
.cfi_startproc
.cfi_personality 0x0,__gxx_personality_v0
pushl %ebp
.cfi_def_cfa_offset 8
movl %esp, %ebp
.cfi_offset 5, -8
.cfi_def_cfa_register 5
movl 8(%ebp), %eax
movl (%eax), %edx
movl 4(%eax), %ecx
xorl %eax, %eax
cmpl %ecx, %edx
je .L11
.p2align 4,,7
.p2align 3
..L14:
addl (%edx), %eax
addl $4, %edx
cmpl %edx, %ecx
jne .L14
..L11:
popl %ebp
ret
.cfi_endproc
..LFE436:
.size _Z6sum_a2RKSt6vectorIiSaIiEE, .-_Z6sum_a2RKSt6vectorIiSaIiEE
.p2align 4,,15
..globl _Z6sum_a3RKSt6vectorIiSaIiEE
.type _Z6sum_a3RKSt6vectorIiSaIiEE, @function
_Z6sum_a3RKSt6vectorIiSaIiEE:
..LFB437:
.cfi_startproc
.cfi_personality 0x0,__gxx_personality_v0
pushl %ebp
.cfi_def_cfa_offset 8
movl %esp, %ebp
.cfi_offset 5, -8
.cfi_def_cfa_register 5
movl 8(%ebp), %eax
movl 4(%eax), %ecx
movl (%eax), %edx
xorl %eax, %eax
cmpl %edx, %ecx
je .L18
.p2align 4,,7
.p2align 3
..L21:
addl (%edx), %eax
addl $4, %edx
cmpl %edx, %ecx
jne .L21
..L18:
popl %ebp
ret
.cfi_endproc
..LFE437:
.size _Z6sum_a3RKSt6vectorIiSaIiEE, .-_Z6sum_a3RKSt6vectorIiSaIiEE
.p2align 4,,15
..globl _Z6sum_b1RSt6vectorIiSaIiEE
.type _Z6sum_b1RSt6vectorIiSaIiEE, @function
_Z6sum_b1RSt6vectorIiSaIiEE:
..LFB438:
.cfi_startproc
.cfi_personality 0x0,__gxx_personality_v0
pushl %ebp
.cfi_def_cfa_offset 8
movl %esp, %ebp
.cfi_offset 5, -8
.cfi_def_cfa_register 5
movl 8(%ebp), %eax
movl (%eax), %edx
movl 4(%eax), %ecx
xorl %eax, %eax
cmpl %ecx, %edx
je .L25
.p2align 4,,7
.p2align 3
..L28:
addl (%edx), %eax
addl $4, %edx
cmpl %edx, %ecx
jne .L28
..L25:
popl %ebp
ret
.cfi_endproc
..LFE438:
.size _Z6sum_b1RSt6vectorIiSaIiEE, .-_Z6sum_b1RSt6vectorIiSaIiEE
.p2align 4,,15
..globl _Z6sum_b2RSt6vectorIiSaIiEE
.type _Z6sum_b2RSt6vectorIiSaIiEE, @function
_Z6sum_b2RSt6vectorIiSaIiEE:
..LFB439:
.cfi_startproc
.cfi_personality 0x0,__gxx_personality_v0
pushl %ebp
.cfi_def_cfa_offset 8
movl %esp, %ebp
.cfi_offset 5, -8
.cfi_def_cfa_register 5
movl 8(%ebp), %eax
movl (%eax), %edx
movl 4(%eax), %ecx
xorl %eax, %eax
cmpl %ecx, %edx
je .L32
.p2align 4,,7
.p2align 3
..L35:
addl (%edx), %eax
addl $4, %edx
cmpl %edx, %ecx
jne .L35
..L32:
popl %ebp
ret
.cfi_endproc
..LFE439:
.size _Z6sum_b2RSt6vectorIiSaIiEE, .-_Z6sum_b2RSt6vectorIiSaIiEE
.p2align 4,,15
..globl _Z6sum_b3RSt6vectorIiSaIiEE
.type _Z6sum_b3RSt6vectorIiSaIiEE, @function
_Z6sum_b3RSt6vectorIiSaIiEE:
..LFB440:
.cfi_startproc
.cfi_personality 0x0,__gxx_personality_v0
pushl %ebp
.cfi_def_cfa_offset 8
movl %esp, %ebp
.cfi_offset 5, -8
.cfi_def_cfa_register 5
movl 8(%ebp), %eax
movl 4(%eax), %ecx
movl (%eax), %edx
xorl %eax, %eax
cmpl %edx, %ecx
je .L39
.p2align 4,,7
.p2align 3
..L42:
addl (%edx), %eax
addl $4, %edx
cmpl %edx, %ecx
jne .L42
..L39:
popl %ebp
ret
.cfi_endproc
..LFE440:
.size _Z6sum_b3RSt6vectorIiSaIiEE, .-_Z6sum_b3RSt6vectorIiSaIiEE
.ident "GCC: (Gentoo 4.4.3-r2 p1.2) 4.4.3"
.section .note.GNU-stack,"",@progbits
The versions sum_x1 and sum_x2 (x=a,b) are compiled into identical code. The
difference of whether to manually store the termination value of the loop
after computing it once or leaving the computation inside the loop is _gone_
after the compiler is done. I expect other optimizing compilers to reproduce
this observation: the technique (moving code outside a loop when possible)
used in this optimization have been known for a long time. So, let the
compiler worry about micro-optimizations such as these and just choose the
version that fits best the coding style used in the project unless profiling
shows that there is a real bottleneck.
Best
Kai-Uwe Bux