Consider the following code:
#include <iostream.h>
class Scalar
{
public:
int integer;
Scalar(int i)
{integer=i;}
~Scalar(){}
const Scalar operator+(const Scalar& r)const
{return Scalar(integer+r.integer);}
void operator=(const Scalar& r)
{integer=r.integer;}
};
int main(int argc, char* argv[])
{
Scalar a(5);
Scalar b(7);
Scalar c(0);
c=a+b;
cout << c.integer << "\n";
return 0;
}
Do you see any reason why the + operator won''t go inline, when compiled with /GX, /EHs, or /EHa (use exception handling)? Take a look at the disassembly output.
00401000 sub_401000 proc near ; CODE XREF: start+AF.p
00401000
00401000 var_1C = dword ptr -1Ch
00401000 var_18 = dword ptr -18h
00401000 var_14 = dword ptr -14h
00401000 var_10 = byte ptr -10h
00401000 var_C = dword ptr -0Ch
00401000 var_4 = dword ptr -4
00401000
00401000 push 0FFFFFFFFh ;*
00401002 push offset loc_407CB8 ;*
00401007 mov eax, large fs:0 ;set up for exception handling
0040100D push eax ;*
0040100E mov large fs:0, esp ;*
00401015 sub esp, 10h ;reserve stack for variables
00401018 mov [esp+1Ch+var_18], 5 ;a
00401020 xor eax, eax
00401022 mov [esp+1Ch+var_1C], 7 ;b
0040102A mov [esp+1Ch+var_4], eax ;some sort of object counter?
0040102E mov [esp+1Ch+var_14], eax ;c
00401032 lea eax, [esp+1Ch+var_1C] ;b
00401036 lea ecx, [esp+1Ch+var_10] ;the temporary created by a+b
0040103A push eax ;b
0040103B push ecx ;temp
0040103C lea ecx, [esp+24h+var_18] ;a
00401040 mov byte ptr [esp+24h+var_4], 2 ;two objects to the counter
00401045 call sub_401090 ;operator+
0040104A mov eax, [eax] ;eax=the value of temp+0=integer
0040104C push offset unk_409040
00401051 push eax
00401052 mov ecx, offset dword_40B9C8
00401057 mov [esp+24h+var_14], eax ;c
0040105B call ??6ostream@@QAEAAV0@H@Z ; ostream::operator<<(int)
00401060 mov ecx, eax
00401062 call ??6ostream@@QAEAAV0@PBD@Z ; ostream::operator<<(char const *)
00401067 mov ecx, [esp+1Ch+var_C] ;*
0040106B xor eax, eax ;end exception handling
0040106D mov large fs:0, ecx ;*
00401074 add esp, 1Ch ;return stack
00401077 retn
00401077 sub_401000 endp
As you can see the compiler makes a function call and produces stack undwinding code. This is not so good when making for example an optimized vector class.
Now lets remove the destructor and recompile:
00401000 sub_401000 proc near ; CODE XREF: start+AF.p
00401000 push offset unk_409040
00401005 push 0Ch ;5+7=0Ch
00401007 mov ecx, offset dword_40B9C8
0040100C call ??6ostream@@QAEAAV0@H@Z ; ostream::operator<<(int)
00401011 mov ecx, eax
00401013 call ??6ostream@@QAEAAV0@PBD@Z ; ostream::operator<<(char const *)
00401018 xor eax, eax
0040101A retn
0040101A sub_401000 endp
Now that is more like the code I want to see. Also take a look at the topic "C++ Operator Overloading, and why it blows chunks", to see the actual differencies in speed.
Well, looking at the msdn it states that a function won''t go inline when, "the function returns an unwindable object by value, when compiled with /GX, /EHs, or /EHa." Now what is an unwindable object? Well looking deeper in msdn, you can read that, when an exception is thrown, the destructors for all automatically created objects are called. So an unwindable object must be an object with a destructor.
I am not an expert on how the compiler generates exception code, but I can''t see any reasons why an inline function will differ from a normal one. Do anyone else have an idea?