摘要:
似乎没有保证防止重新排序的方法,但是只要未启用链接时/全程序优化,将调用的函数放在单独的编译单元中似乎是一个不错的选择。(至少在GCC中,尽管逻辑上也暗示其他编译器也可能这样做。)这是以函数调用为代价的-内联代码根据定义位于同一编译单元中,并且可以重新排序。
原始答案:
GCC对-O2优化下的通话进行重新排序:
#include <chrono>
static int foo(int x) // 'static' or not here doesn't affect ordering.
{
return x*2;
}
int fred(int x)
{
auto t1 = std::chrono::high_resolution_clock::now();
int y = foo(x);
auto t2 = std::chrono::high_resolution_clock::now();
return y;
}
GCC 5.3.0:
g++ -S --std=c++11 -O0 fred.cpp
:
_ZL3fooi:
pushq %rbp
movq %rsp, %rbp
movl %ecx, 16(%rbp)
movl 16(%rbp), %eax
addl %eax, %eax
popq %rbp
ret
_Z4fredi:
pushq %rbp
movq %rsp, %rbp
subq $64, %rsp
movl %ecx, 16(%rbp)
call _ZNSt6chrono3_V212system_clock3nowEv
movq %rax, -16(%rbp)
movl 16(%rbp), %ecx
call _ZL3fooi
movl %eax, -4(%rbp)
call _ZNSt6chrono3_V212system_clock3nowEv
movq %rax, -32(%rbp)
movl -4(%rbp), %eax
addq $64, %rsp
popq %rbp
ret
但:
g++ -S --std=c++11 -O2 fred.cpp
:
_Z4fredi:
pushq %rbx
subq $32, %rsp
movl %ecx, %ebx
call _ZNSt6chrono3_V212system_clock3nowEv
call _ZNSt6chrono3_V212system_clock3nowEv
leal (%rbx,%rbx), %eax
addq $32, %rsp
popq %rbx
ret
现在,使用foo()作为外部函数:
#include <chrono>
int foo(int x);
int fred(int x)
{
auto t1 = std::chrono::high_resolution_clock::now();
int y = foo(x);
auto t2 = std::chrono::high_resolution_clock::now();
return y;
}
g++ -S --std=c++11 -O2 fred.cpp
:
_Z4fredi:
pushq %rbx
subq $32, %rsp
movl %ecx, %ebx
call _ZNSt6chrono3_V212system_clock3nowEv
movl %ebx, %ecx
call _Z3fooi
movl %eax, %ebx
call _ZNSt6chrono3_V212system_clock3nowEv
movl %ebx, %eax
addq $32, %rsp
popq %rbx
ret
但是,如果与-flto链接(链接时优化),则:
0000000100401710 <main>:
100401710: 53 push %rbx
100401711: 48 83 ec 20 sub $0x20,%rsp
100401715: 89 cb mov %ecx,%ebx
100401717: e8 e4 ff ff ff callq 100401700 <__main>
10040171c: e8 bf f9 ff ff callq 1004010e0 <_ZNSt6chrono3_V212system_clock3nowEv>
100401721: e8 ba f9 ff ff callq 1004010e0 <_ZNSt6chrono3_V212system_clock3nowEv>
100401726: 8d 04 1b lea (%rbx,%rbx,1),%eax
100401729: 48 83 c4 20 add $0x20,%rsp
10040172d: 5b pop %rbx
10040172e: c3 retq