Valid HTML 4.01 Transitional Valid CSS Valid SVG 1.0

Me, myself & IT

Optimizing Microsoft® Visual C compilers

Purpose

Document poor code generation of Microsoft’s optimizing Visual C compilers.

Example 0

According to their documentation on MSDN, the macros Int32x32To64 and UInt32x32To64 defined in the header file WINNT.H of the Windows® SDK generate just a single multiply instruction:
Multiplies two signed 32-bit integers, returning a signed 64-bit integer result. The function performs optimally on 32-bit Windows.

This function is implemented on all platforms by optimal inline code: a single multiply instruction that returns a 64-bit result.
Multiplies two unsigned 32-bit integers, returning an unsigned 64-bit integer result. The function performs optimally on 32-bit Windows.

This function is implemented on all platforms by optimal inline code: a single multiply instruction that returns a 64-bit result.
Contrary to this, the 32-bit Visual C compilers but generate calls to the external routine _allmul() instead of the single multiply instruction!

Note: _allmul() is an undocumented helper routine for the 32-bit compiler which multiplies two 64-bit integers, similar to the (documented) _alldiv() and _aulldiv() helper routines.

Demonstration

  1. Create the text file example0.c with the following content in an arbitrary, preferable empty directory:

    // Copyright © 2004-2018, Stefan Kanthak <‍stefan‍.‍kanthak‍@‍nexgo‍.‍de‍>
    
    #define Int32x32To64(a, b)  ((__int64)(((__int64)((long)(a))) * ((long)(b))))
    #define UInt32x32To64(a, b) ((unsigned __int64)(((unsigned __int64)((unsigned int)(a))) * ((unsigned int)(b))))
    
    int main(int argc)
    {
        __int64 x = argc * -argc;
        __int64 y = Int32x32To64(argc, -argc);
        __int64 z = UInt32x32To64(argc, -argc);
    }
    
  2. Generate the assembly listing example0.asm and the object file example0.obj from the source file example0.c created in step 1., using the Visual C 2015 compiler for the x86 processor architecture:

    CL.EXE /Bv /c /Fa /Tcexample0.c
    Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 19.13.26129.01 for 80x86
    Copyright (C) Microsoft Corporation.  All rights reserved.
    
    Compiler Passes:
     C:\Program Files\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.13.26128\bin\Hostx86\x86\cl.exe:        Version 19.13.26129.0
     C:\Program Files\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.13.26128\bin\Hostx86\x86\c1.dll:        Version 19.13.26129.0
     C:\Program Files\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.13.26128\bin\Hostx86\x86\c1xx.dll:      Version 19.13.26129.0
     C:\Program Files\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.13.26128\bin\Hostx86\x86\c2.dll:        Version 19.13.26129.0
     C:\Program Files\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.13.26128\bin\Hostx86\x86\link.exe:      Version 14.13.26129.0
     C:\Program Files\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.13.26128\bin\Hostx86\x86\mspdb140.dll:  Version 14.13.26129.0
     C:\Program Files\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.13.26128\bin\Hostx86\x86\1033\clui.dll: Version 19.13.26129.0
    
    example0.c
    
  3. Display the assembly listing example0.asm created in step 2.:

    Type example0.asm
    ; Listing generated by Microsoft (R) Optimizing Compiler Version 19.13.26129.0 
    
    	TITLE	C:\Users\Stefan\Desktop\example0.c
    	.686P
    	.XMM
    	include	listing.inc
    	.model	flat
    
    INCLUDELIB LIBCMT
    INCLUDELIB OLDNAMES
    
    PUBLIC	_main
    EXTRN	__allmul:PROC
    
    ; Function compile flags: /Odtp
    _TEXT	SEGMENT
    _z$ = -24						; size = 8
    _y$ = -16						; size = 8
    _x$ = -8						; size = 8
    _argc$ = 8						; size = 4
    _main	PROC
    ; File c:\users\stefan\desktop\example0.c
    ; Line 7
    	push	ebp
    	mov	ebp, esp
    	sub	esp, 24					; 00000018H
    	push	esi
    ; Line 8
    	mov	eax, DWORD PTR _argc$[ebp]
    	neg	eax
    	imul	eax, DWORD PTR _argc$[ebp]
    	cdq
    	mov	DWORD PTR _x$[ebp], eax
    	mov	DWORD PTR _x$[ebp+4], edx
    ; Line 9
    	mov	eax, DWORD PTR _argc$[ebp]
    	cdq
    	mov	ecx, eax
    	mov	esi, edx
    	mov	eax, DWORD PTR _argc$[ebp]
    	neg	eax
    	cdq
    	push	edx
    	push	eax
    	push	esi
    	push	ecx
    	call	__allmul
    	mov	edx, DWORD PTR _argc$[ebp]
    	neg	edx
    	imul	edx
    	mov	DWORD PTR _y$[ebp], eax
    	mov	DWORD PTR _y$[ebp+4], edx
    ; Line 10
    	mov	edx, DWORD PTR _argc$[ebp]
    	neg	edx
    	mov	eax, DWORD PTR _argc$[ebp]
    	mul	edx
    	mov	DWORD PTR _z$[ebp], eax
    	mov	DWORD PTR _z$[ebp+4], edx
    ; Line 11
    	xor	eax, eax
    	pop	esi
    	mov	esp, ebp
    	pop	ebp
    	leave
    	ret	0
    _main	ENDP
    _TEXT	ENDS
    END
    
    Notice the difference between the signed and the unsigned multiplication: while a single multiply instruction is generated for the latter, a call of the external routine _allmul() is generated for the former!

Fix

Both macros should have been replaced a long time ago by the intrinsic functions __emul() and __emulu() introduced with the Visual C 2005 compiler!
#if _MSC_VER < 1400
#define Int32x32To64(a, b)  ((__int64)(((__int64)((long)(a))) * ((long)(b))))
#define UInt32x32To64(a, b) ((unsigned __int64)(((unsigned __int64)((unsigned int)(a))) * ((unsigned int)(b))))
#else
         __int64 __emul(int, int);
unsigned __int64 __emulu(unsigned int, unsigned int);
#pragma intrinsic(__emul, __emulu)
#define Int32x32To64  __emul
#define UInt32x32To64 __emulu
#endif
Note: of course this also applies to the macros (really: inline assembler functions) Int64ShllMod32(), Int64ShraMod32() and Int64ShrlMod32() defined in the header file WINNT.H of the Windows SDK; these too should have been replaced a long time ago by the intrinsic functions __ll_lshift(), __ll_rshift() and __ull_rshift() introduced with the Visual C 2005 compiler!
#if _MSC_VER < 1400
…
#else
unsigned __int64 __ll_lshift(unsigned __int64, int);
         __int64 __ll_rshift(__int64, int);
unsigned __int64 __ull_rshift(unsigned __int64, int);
#pragma intrinsic(__ll_lshift, __ll_rshift, __ull_rshift)
#define Int64ShllMod32	__ll_lshift
#define Int64ShraMod32	__ll_rshift
#define Int64ShrlMod32	__ull_rshift
#endif

Example 1

Superfluous unreachable call of external routine __report_rangecheckfailure() generated by Visual C 2015 compiler.

Demonstration

  1. Create the text file example1.c with the following content in an arbitrary, preferable empty directory:

    // Copyright © 2018, Stefan Kanthak <‍stefan‍.‍kanthak‍@‍nexgo‍.‍de‍>
    
    #define MAX_PATH 260
    
    typedef short wchar_t;
    
    unsigned __stdcall GetModuleFileNameA(void *, char *, unsigned);
    
    int main()
    {
        char sz[MAX_PATH];
        unsigned dw = GetModuleFileNameA(0, sz, MAX_PATH);
    
        if (dw < MAX_PATH)
            sz[dw] = '\0';
    }
    
    unsigned __stdcall GetModuleFileNameW(void *, wchar_t *, unsigned);
    
    int wmain()
    {
        wchar_t sz[MAX_PATH];
        unsigned dw = GetModuleFileNameW(0, sz, MAX_PATH);
    
        if (dw < MAX_PATH)
            sz[dw] = L'\0';
    }
    
  2. Generate the assembly listing example1.asm and the object file example1.obj from the source file example1.c created in step 1., using the Visual C 2015 compiler for the x86 processor architecture:

    CL.EXE /Bv /c /Fa /O1s /Tcexample1.c /W4 /Zl
    Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 19.13.26129.01 for 80x86
    Copyright (C) Microsoft Corporation.  All rights reserved.
    
    Compiler Passes:
     C:\Program Files\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.13.26128\bin\Hostx86\x86\cl.exe:        Version 19.13.26129.0
     C:\Program Files\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.13.26128\bin\Hostx86\x86\c1.dll:        Version 19.13.26129.0
     C:\Program Files\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.13.26128\bin\Hostx86\x86\c1xx.dll:      Version 19.13.26129.0
     C:\Program Files\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.13.26128\bin\Hostx86\x86\c2.dll:        Version 19.13.26129.0
     C:\Program Files\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.13.26128\bin\Hostx86\x86\link.exe:      Version 14.13.26129.0
     C:\Program Files\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.13.26128\bin\Hostx86\x86\mspdb140.dll:  Version 14.13.26129.0
     C:\Program Files\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.13.26128\bin\Hostx86\x86\1033\clui.dll: Version 19.13.26129.0
    
    example1.c
    
  3. Display the assembly listing example1.asm created in step 2.:

    Type example1.asm
    ; Listing generated by Microsoft (R) Optimizing Compiler Version 19.13.26129.0 
    
    	TITLE	C:\Users\Stefan\Desktop\example1.c
    	.686P
    	.XMM
    	include	listing.inc
    	.model	flat
    
    INCLUDELIB LIBCMT
    INCLUDELIB OLDNAMES
    
    PUBLIC	_main
    PUBLIC	_wmain
    EXTRN	___report_rangecheckfailure:PROC
    EXTRN	_GetModuleFileNameA@12:PROC
    EXTRN	_GetModuleFileNameW@12:PROC
    EXTRN	@__security_check_cookie@4:PROC
    EXTRN	___security_cookie:DWORD
    
    ; Function compile flags: /Ogspy
    ;	COMDAT	_main
    _TEXT	SEGMENT
    _sz$ = -264						; size = 260
    __$ArrayPad$ = -4					; size = 4
    _main	PROC						; COMDAT
    ; File c:\users\stefan\desktop\example1.c
    ; Line 10
    	push	ebp
    	mov	ebp, esp
    	sub	esp, 264				; 00000108H
    	mov	eax, DWORD PTR ___security_cookie
    	xor	eax, ebp
    	mov	DWORD PTR __$ArrayPad$[ebp], eax
    	push	esi
    ; Line 12
    	mov	esi, 260				; 00000104H
    	lea	eax, DWORD PTR _sz$[ebp]
    	push	esi
    	push	eax
    	push	0
    	call	_GetModuleFileNameA@12
    ; Line 14
    	cmp	eax, esi
    	pop	esi
    	jae	SHORT $LN2@main
    ; Line 15
    	mov	BYTE PTR _sz$[ebp+eax], 0
    $LN2@main:
    ; Line 16
    	mov	ecx, DWORD PTR __$ArrayPad$[ebp]
    	xor	eax, eax
    	xor	ecx, ebp
    	call	@__security_check_cookie@4
    	mov	esp, ebp
    	pop	ebp
    	leave
    	ret	0
    _main	ENDP
    _TEXT	ENDS
    
    ; Function compile flags: /Ogspy
    ;	COMDAT	_wmain
    _TEXT	SEGMENT
    _sz$ = -524						; size = 520
    __$ArrayPad$ = -4					; size = 4
    _wmain	PROC						; COMDAT
    ; File c:\users\stefan\desktop\example1.c
    ; Line 21
    	push	ebp
    	mov	ebp, esp
    	sub	esp, 524				; 0000020cH
    	mov	eax, DWORD PTR ___security_cookie
    	xor	eax, ebp
    	mov	DWORD PTR __$ArrayPad$[ebp], eax
    	push	esi
    ; Line 23
    	mov	esi, 260				; 00000104H
    	lea	eax, DWORD PTR _sz$[ebp]
    	push	esi
    	push	eax
    	push	0
    	call	_GetModuleFileNameW@12
    ; Line 25
    	cmp	eax, esi
    	pop	esi
    	jae	SHORT $LN2@wmain
    ; Line 26
    	add	eax, eax
    	cmp	eax, 520				; 00000208H
    	jae	SHORT $LN9@wmain
    $LN2@wmain:
    ; Line 27
    	mov	ecx, DWORD PTR __$ArrayPad$[ebp]
    	xor	eax, eax
    	xor	ecx, ebp
    	call	@__security_check_cookie@4
    	mov	esp, ebp
    	pop	ebp
    	leave
    	ret	0
    $LN9@wmain:
    ; Line 26
    	call	___report_rangecheckfailure
    $LN11@wmain:
    $LN8@wmain:
    	int	3
    _wmain	ENDP
    _TEXT	ENDS
    END
    
    Notice the difference between the single-byte character routine main() and the double-byte character routine wmain(): in the former, the conditional assignment of the terminating NUL character is not removed; in the latter, a superfluous range check with a conditional branch that can never be taken is inserted instead, plus an unreachable call of the external routine __report_rangecheckfailure()!
  4. Generate the assembly listing example1.asm and the object file example1.obj from the source file example1.c created in step 1., using the Visual C 2015 compiler for the x64 processor architecture:

    CL.EXE /Bv /c /Fa /O1s /Tcexample1.c /W4 /Zl
    Microsoft (R) C/C++ Optimizing Compiler Version 19.13.26129.01 for x64
    Copyright (C) Microsoft Corporation.  All rights reserved.
    
    Compiler Passes:
     C:\Program Files\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.13.26128\bin\Hostx64\x64\cl.exe:        Version 19.13.26129.0
     C:\Program Files\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.13.26128\bin\Hostx64\x64\c1.dll:        Version 19.13.26129.0
     C:\Program Files\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.13.26128\bin\Hostx64\x64\c1xx.dll:      Version 19.13.26129.0
     C:\Program Files\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.13.26128\bin\Hostx64\x64\c2.dll:        Version 19.13.26129.0
     C:\Program Files\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.13.26128\bin\Hostx64\x64\link.exe:      Version 14.13.26129.0
     C:\Program Files\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.13.26128\bin\Hostx64\x64\mspdb140.dll:  Version 14.13.26129.0
     C:\Program Files\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.13.26128\bin\Hostx64\x64\1033\clui.dll: Version 19.13.26129.0
    
    example1.c
    
  5. Display the assembly listing example1.asm created in step 4.:

    Type example1.asm
    ; Listing generated by Microsoft (R) Optimizing Compiler Version 19.13.26129.0 
    
    include	listing.inc
    
    INCLUDELIB LIBCMT
    INCLUDELIB OLDNAMES
    
    PUBLIC	main
    PUBLIC	wmain
    EXTRN	__report_rangecheckfailure:PROC
    EXTRN	GetModuleFileNameA:PROC
    EXTRN	GetModuleFileNameW:PROC
    EXTRN	__GSHandlerCheck:PROC
    EXTRN	__security_check_cookie:PROC
    EXTRN	__security_cookie:QWORD
    ;	COMDAT	pdata
    pdata	SEGMENT
    $pdata$main DD	imagerel $LN10
    	DD	imagerel $LN10+97
    	DD	imagerel $unwind$main
    pdata	ENDS
    ;	COMDAT	pdata
    pdata	SEGMENT
    $pdata$wmain DD imagerel $LN11
    	DD	imagerel $LN11+95
    	DD	imagerel $unwind$wmain
    pdata	ENDS
    ;	COMDAT	xdata
    xdata	SEGMENT
    $unwind$wmain DD 021919H
    	DD	0490107H
    	DD	imagerel __GSHandlerCheck
    	DD	0230H
    xdata	ENDS
    ;	COMDAT	xdata
    xdata	SEGMENT
    $unwind$main DD 021919H
    	DD	0290107H
    	DD	imagerel __GSHandlerCheck
    	DD	0130H
    xdata	ENDS
    
    ; Function compile flags: /Ogspy
    ;	COMDAT	main
    _TEXT	SEGMENT
    sz$ = 32
    __$ArrayPad$ = 304
    main	PROC						; COMDAT
    ; File c:\users\stefan\desktop\example1.c
    ; Line 10
    $LN10:
    	sub	rsp, 328				; 00000148H
    	mov	rax, QWORD PTR __security_cookie
    	xor	rax, rsp
    	mov	QWORD PTR __$ArrayPad$[rsp], rax
    ; Line 12
    	mov	r8d, 260				; 00000104H
    	lea	rdx, QWORD PTR sz$[rsp]
    	xor	ecx, ecx
    	call	GetModuleFileNameA
    ; Line 14
    	cmp	eax, 260				; 00000104H
    	jae	SHORT $LN2@main
    ; Line 15
    	mov	eax, eax
    	cmp	rax, 260				; 00000104H
    	jae	SHORT $LN9@main
    	mov	BYTE PTR sz$[rsp+rax], 0
    $LN2@main:
    ; Line 16
    	xor	eax, eax
    	mov	rcx, QWORD PTR __$ArrayPad$[rsp]
    	xor	rcx, rsp
    	call	__security_check_cookie
    	add	rsp, 328				; 00000148H
    	ret	0
    $LN9@main:
    ; Line 15
    	call	__report_rangecheckfailure
    	int	3
    $LN8@main:
    main	ENDP
    _TEXT	ENDS
    
    ; Function compile flags: /Ogspy
    ;	COMDAT	wmain
    _TEXT	SEGMENT
    sz$ = 32
    __$ArrayPad$ = 560
    wmain	PROC						; COMDAT
    ; File c:\users\stefan\desktop\example1.c
    ; Line 21
    $LN11:
    	sub	rsp, 584				; 00000248H
    	mov	rax, QWORD PTR __security_cookie
    	xor	rax, rsp
    	mov	QWORD PTR __$ArrayPad$[rsp], rax
    ; Line 23
    	mov	r8d, 260				; 00000104H
    	lea	rdx, QWORD PTR sz$[rsp]
    	xor	ecx, ecx
    	call	GetModuleFileNameW
    ; Line 25
    	cmp	eax, 260				; 00000104H
    	jae	SHORT $LN2@wmain
    ; Line 26
    	mov	eax, eax
    	add	rax, rax
    	cmp	rax, 520				; 00000208H
    	jae	SHORT $LN9@wmain
    $LN2@wmain:
    ; Line 27
    	xor	eax, eax
    	mov	rcx, QWORD PTR __$ArrayPad$[rsp]
    	xor	rcx, rsp
    	call	__security_check_cookie
    	add	rsp, 584				; 00000248H
    	ret	0
    $LN9@wmain:
    ; Line 26
    	call	__report_rangecheckfailure
    	int	3
    $LN8@wmain:
    wmain	ENDP
    _TEXT	ENDS
    END
    
    Notice the superfluous range checks with conditional branches that can never be taken, plus the unreachable calls of the external routine __report_rangecheckfailure()!
    Also notice that the conditional assignment of the terminating NUL character is not removed in the single-byte character routine main().

Example 2

Superfluous load and store operations using superfluous temporary variable generated by Visual C 2015 and Visual C 2010 compilers.

Demonstration

  1. Create the text file example2.c with the following content in an arbitrary, preferable empty directory:

    // Copyright © 2018, Stefan Kanthak <‍stefan‍.‍kanthak‍@‍nexgo‍.‍de‍>
    
    __inline
    unsigned htonl(unsigned ul)
    {
    #if _MSC_VER >= 1900
    	__asm	movbe	eax, ul
    #else
    	__asm	mov	eax, ul
    	__asm	bswap	eax
    #endif
    }
    
    int main(int argc)
    {
        unsigned array[] = {'MSFT', 'MSVC'};
    
        argc = htonl(argc);
    
        for (argc = 0; argc < sizeof(array) / sizeof(*array); argc++)
            array[argc] = htonl(array[argc]);
    }
    
  2. Generate the assembly listing example2.asm and the object file example2.obj from the source file example2.c created in step 1., using the Visual C 2015 compiler for the x86 processor architecture:

    CL.EXE /Bv /c /Fa /Ox /Tcexample2.c
    Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 19.13.26129.01 for 80x86
    Copyright (C) Microsoft Corporation.  All rights reserved.
    
    Compiler Passes:
     C:\Program Files\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.13.26128\bin\Hostx86\x86\cl.exe:        Version 19.13.26129.0
     C:\Program Files\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.13.26128\bin\Hostx86\x86\c1.dll:        Version 19.13.26129.0
     C:\Program Files\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.13.26128\bin\Hostx86\x86\c1xx.dll:      Version 19.13.26129.0
     C:\Program Files\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.13.26128\bin\Hostx86\x86\c2.dll:        Version 19.13.26129.0
     C:\Program Files\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.13.26128\bin\Hostx86\x86\link.exe:      Version 14.13.26129.0
     C:\Program Files\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.13.26128\bin\Hostx86\x86\mspdb140.dll:  Version 14.13.26129.0
     C:\Program Files\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.13.26128\bin\Hostx86\x86\1033\clui.dll: Version 19.13.26129.0
    
    example2.c
    
  3. Display the assembly listing example2.asm created in step 2.:

    Type example2.asm
    ; Listing generated by Microsoft (R) Optimizing Compiler Version 19.13.26129.0 
    
    	TITLE	C:\Users\Stefan\Desktop\example2.c
    	.686P
    	.XMM
    	include	listing.inc
    	.model	flat
    
    INCLUDELIB LIBCMT
    INCLUDELIB OLDNAMES
    
    PUBLIC	_htonl
    PUBLIC	_main
    
    ; Function compile flags: /Ogtpy
    ;	COMDAT	_htonl
    _TEXT	SEGMENT
    _ul$ = 8						; size = 4
    _htonl	PROC						; COMDAT
    ; File c:\users\stefan\desktop\example2.c
    ; Line 7
    	movbe	eax, DWORD PTR _ul$[esp-4]
    ; Line 12
    	ret	0
    _htonl	ENDP
    _TEXT	ENDS
    
    ; Function compile flags: /Ogtpy
    _TEXT	SEGMENT
    _array$ = -8						; size = 8
    _ul$ = 8						; size = 4
    _argc$ = 8						; size = 4
    _main	PROC
    ; File c:\users\stefan\desktop\example2.c
    ; Line 15
    	sub	esp, 8
    ; Line 16
    	mov	DWORD PTR _array$[esp+8], 1297303124	; 4d534654H
    	mov	DWORD PTR _array$[esp+12], 1297307203	; 4d535643H
    ; Line 18
    	movbe	eax, DWORD PTR _argc$[esp+4]
    ; Line 20
    	xor	ecx, ecx
    	npad	6
    $LL4@main:
    ; Line 21
    	mov	eax, DWORD PTR _array$[esp+ecx*4+8]
    	mov	DWORD PTR _ul$[esp+4], eax
    	movbe	eax, DWORD PTR _ul$[esp+4]
    	movbe	eax, DWORD PTR _array$[esp+ecx*4+8]
    	mov	DWORD PTR _array$[esp+ecx*4+8], eax
    	inc	ecx
    	cmp	ecx, 2
    	jb	SHORT $LL4@main
    ; Line 22
    	add	esp, 8
    	ret	0
    _main	ENDP
    _TEXT	ENDS
    END
    
    Notice the superfluous in(s)ane transfer of the EAX register to and from the (intermediate) variable _ul$ generated for line 21!
    Also notice that the superfluous instruction generated for line 18 uses no superfluous intermediate variable!
  4. Generate the assembly listing example2.asm and the object file example2.obj from the source file example2.c created in step 1., using the Visual C 2010 compiler for the x86 processor architecture:

    CL.EXE /Bv /c /Fa /Ox /Tcexample2.c
    Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 16.00.40219.01 for 80x86
    Copyright (C) Microsoft Corporation.  All rights reserved.
    
    Compiler Passes:
     C:\Program Files\Microsoft Visual Studio 10.0\VC\Bin\cl.exe:        Version 16.00.40219.1
     C:\Program Files\Microsoft Visual Studio 10.0\VC\Bin\c1.dll:        Version 16.00.40219.400
     C:\Program Files\Microsoft Visual Studio 10.0\VC\Bin\c1xx.dll:      Version 16.00.40219.400
     C:\Program Files\Microsoft Visual Studio 10.0\VC\Bin\c2.dll:        Version 16.00.40219.449
     C:\Program Files\Microsoft Visual Studio 10.0\VC\Bin\link.exe:      Version 10.00.40219.386
     C:\Program Files\Microsoft Visual Studio 10.0\VC\Bin\mspdb100.dll:  Version 10.00.40219.478
     C:\Program Files\Microsoft Visual Studio 10.0\VC\Bin\1033\clui.dll: Version 16.00.40219.1
    
    example2.c
    
  5. Display the assembly listing example2.asm created in step 4.:

    Type example2.asm
    ; Listing generated by Microsoft (R) Optimizing Compiler Version 16.00.40219.449 
    
    	TITLE	C:\Users\Stefan\Desktop\example2.c
    	.686P
    	.XMM
    	include	listing.inc
    	.model	flat
    
    INCLUDELIB LIBCMT
    INCLUDELIB OLDNAMES
    
    PUBLIC	_htonl
    ; Function compile flags: /Ogtpy
    ;	COMDAT	_htonl
    _TEXT	SEGMENT
    _ul$ = 8						; size = 4
    _htonl	PROC						; COMDAT
    ; File c:\users\stefan\desktop\example2.c
    ; Line 9
    	mov	eax, DWORD PTR _ul$[esp-4]
    ; Line 10
    	bswap	eax
    ; Line 11
    	ret	0
    _htonl	ENDP
    _TEXT	ENDS
    PUBLIC	_main
    ; Function compile flags: /Ogtpy
    ;	COMDAT	_main
    _TEXT	SEGMENT
    _array$ = -8						; size = 8
    $T1087 = 8						; size = 4
    _argc$ = 8						; size = 4
    _main	PROC						; COMDAT
    ; Line 15
    	push	ebp
    	mov	ebp, esp
    	push	ecx
    	push	ecx
    ; Line 16
    	mov	DWORD PTR _array$[ebp], 1297303124	; 4d534654H
    	mov	DWORD PTR _array$[ebp+4], 1297307203	; 4d535643H
    ; Line 18
    	mov	eax, DWORD PTR _argc$[ebp]
    	bswap	eax
    ; Line 20
    	xor	edx, edx
    $LL3@main:
    	lea	ecx, DWORD PTR _array$[ebp+edx*4]
    ; Line 21
    	mov	eax, DWORD PTR [ecx]
    	mov	DWORD PTR $T1087[ebp], eax
    	mov	eax, DWORD PTR $T1087[ebp]
    	bswap	eax
    	inc	edx
    	mov	DWORD PTR [ecx], eax
    	cmp	edx, 2
    	jb	SHORT $LL3@main
    ; Line 22
    	leave
    	ret	0
    _main	ENDP
    _TEXT	ENDS
    END
    
    Notice the superfluous in(s)ane transfer of the EAX register to and from the intermediate variable $T1087 generated for line 21!
    Again notice that the superfluous instructions generated for line 18 use no superfluous intermediate variable!

Example 3

Demonstration

  1. Create the text file example3.c with the following content in an arbitrary, preferable empty directory:

    // Copyright © 2018, Stefan Kanthak <‍stefan‍.‍kanthak‍@‍nexgo‍.‍de‍>
    
    #define STRICT
    #define UNICODE
    #define WIN32_LEAN_AND_MEAN
    
    #include <windows.h>
    #include <unknwn.h>
    
    #define IF2CO(class, member, interface)	(&((class *) 0)->member == interface, \
    					 ((class *) (((char *) interface) - (size_t) &(((class *) 0)->member))))
    
    extern	const	GUID	CLSID_NULL;
    
    extern	DWORD	dwCount;
    
    typedef	struct	_CUnknown
    {
    	DWORD		dwCount;
    
    	IUnknown	Unknown;
    } CUnknown;
    
    HRESULT	WINAPI	Unknown_QueryInterface(IUnknown *this, REFIID rIID, VOID **ppv)
    {
    	CUnknown	*that = IF2CO(CUnknown, Unknown, this);
    
    	if (ppv == NULL)
    		return E_POINTER;
    
    	*ppv = NULL;
    
    	if (rIID == NULL)
    		return E_INVALIDARG;
    
    	if (!IsEqualIID(rIID, &IID_IUnknown))
    		return E_NOINTERFACE;
    
    	*ppv = &that->Unknown;
    
    	_InterlockedIncrement(&that->dwCount);
    
    	return S_OK;
    }
    
    DWORD	WINAPI	Unknown_AddRef(IUnknown *this)
    {
    	CUnknown	*that = IF2CO(CUnknown, Unknown, this);
    
    	return _InterlockedIncrement(&that->dwCount);
    }
    
    DWORD	WINAPI	Unknown_Release(IUnknown *this)
    {
    	CUnknown	*that = IF2CO(CUnknown, Unknown, this);
    	DWORD		dw = _InterlockedDecrement(&that->dwCount);
    
    	if (dw != 0L)
    		return dw;
    
    	_InterlockedDecrement(&dwCount);
    
    	CoTaskMemFree(that);
    
    	return 0L;
    }
    
    const	IUnknownVtbl	Unknown_Vtbl = {Unknown_QueryInterface, Unknown_AddRef, Unknown_Release};
    
    Note: this ANSI C source is a minimum implementation of the IUnknown interface.
  2. Generate the assembly listing example3.asm and the object file example3.obj from the source file example3.c created in step 1., using the Visual C 2010 compiler for the x86 processor architecture:

    CL.EXE /Bv /c /Fa /O1is /Tcexample3.c
    Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 16.00.40219.01 for 80x86
    Copyright (C) Microsoft Corporation.  All rights reserved.
    
    Compiler Passes:
     C:\Program Files\Microsoft Visual Studio 10.0\VC\Bin\cl.exe:        Version 16.00.40219.1
     C:\Program Files\Microsoft Visual Studio 10.0\VC\Bin\c1.dll:        Version 16.00.40219.400
     C:\Program Files\Microsoft Visual Studio 10.0\VC\Bin\c1xx.dll:      Version 16.00.40219.400
     C:\Program Files\Microsoft Visual Studio 10.0\VC\Bin\c2.dll:        Version 16.00.40219.449
     C:\Program Files\Microsoft Visual Studio 10.0\VC\Bin\link.exe:      Version 10.00.40219.386
     C:\Program Files\Microsoft Visual Studio 10.0\VC\Bin\mspdb100.dll:  Version 10.00.40219.478
     C:\Program Files\Microsoft Visual Studio 10.0\VC\Bin\1033\clui.dll: Version 16.00.40219.1
    
    example3.c
    
  3. Display the assembly listing example3.asm created in step 2.:

    Type example3.asm
    ; Listing generated by Microsoft (R) Optimizing Compiler Version 16.00.40219.449 
    
    	TITLE	C:\Users\Stefan\Desktop\example3.c
    	.686P
    	.XMM
    	include	listing.inc
    	.model	flat
    
    INCLUDELIB LIBCMT
    INCLUDELIB OLDNAMES
    
    PUBLIC	_Unknown_Release@4
    PUBLIC	_Unknown_AddRef@4
    PUBLIC	_Unknown_QueryInterface@12
    PUBLIC	_Unknown_Vtbl
    CONST	SEGMENT
    _Unknown_Vtbl DD FLAT:_Unknown_QueryInterface@12
    	DD	FLAT:_Unknown_AddRef@4
    	DD	FLAT:_Unknown_Release@4
    CONST	ENDS
    EXTRN	_IID_IUnknown:BYTE
    ; Function compile flags: /Ogspy
    ;	COMDAT	_Unknown_QueryInterface@12
    _TEXT	SEGMENT
    _this$ = 8						; size = 4
    _rIID$ = 12						; size = 4
    _ppv$ = 16						; size = 4
    _Unknown_QueryInterface@12 PROC				; COMDAT
    ; File c:\users\stefan\desktop\example3.c
    ; Line 26
    	mov	edx, DWORD PTR _this$[esp-4]
    ; Line 28
    	mov	eax, DWORD PTR _ppv$[esp-4]
    	add	edx, -4					; fffffffcH
    	test	eax, eax
    	jne	SHORT $LN3@Unknown_Qu
    ; Line 29
    	mov	eax, -2147467261			; 80004003H
    	jmp	SHORT $LN4@Unknown_Qu
    $LN3@Unknown_Qu:
    ; Line 31
    	and	DWORD PTR [eax], 0
    	push	esi
    ; Line 33
    	mov	esi, DWORD PTR _rIID$[esp]
    	test	esi, esi
    	jne	SHORT $LN2@Unknown_Qu
    ; Line 34
    	mov	eax, -2147024809			; 80070057H
    	jmp	SHORT $LN7@Unknown_Qu
    $LN2@Unknown_Qu:
    	push	ebx
    	push	edi
    ; Line 36
    	push	4
    	pop	ecx
    	xor	ebx, ebx
    	mov	edi, OFFSET _IID_IUnknown
    	repe	cmpsd
    	pop	edi
    	pop	ebx
    	je	SHORT $LN1@Unknown_Qu
    ; Line 37
    	mov	eax, -2147467262			; 80004002H
    	jmp	SHORT $LN7@Unknown_Qu
    $LN1@Unknown_Qu:
    ; Line 39
    	lea	ecx, DWORD PTR [edx+4]
    	mov	DWORD PTR [eax], ecx
    ; Line 41
    	xor	eax, eax
    	inc	eax
    	lock	xadd DWORD PTR [edx], eax
    ; Line 43
    	xor	eax, eax
    $LN7@Unknown_Qu:
    	pop	esi
    $LN4@Unknown_Qu:
    ; Line 44
    	ret	12					; 0000000cH
    _Unknown_QueryInterface@12 ENDP
    ; Function compile flags: /Ogspy
    _TEXT	ENDS
    ;	COMDAT	_Unknown_AddRef@4
    _TEXT	SEGMENT
    _this$ = 8						; size = 4
    _Unknown_AddRef@4 PROC					; COMDAT
    ; Line 48
    	mov	ecx, DWORD PTR _this$[esp-4]
    ; Line 50
    	xor	eax, eax
    	add	ecx, -4					; fffffffcH
    	inc	eax
    	lock	xadd DWORD PTR [ecx], eax
    	inc	eax
    ; Line 51
    	ret	4
    _Unknown_AddRef@4 ENDP
    _TEXT	ENDS
    EXTRN	__imp__CoTaskMemFree@4:PROC
    EXTRN	_dwCount:DWORD
    ; Function compile flags: /Ogspy
    ;	COMDAT	_Unknown_Release@4
    _TEXT	SEGMENT
    _this$ = 8						; size = 4
    _Unknown_Release@4 PROC					; COMDAT
    ; Line 55
    	mov	ecx, DWORD PTR _this$[esp-4]
    	add	ecx, -4					; fffffffcH
    ; Line 56
    	mov	edx, ecx
    	or	eax, -1
    	lock	xadd DWORD PTR [edx], eax
    	dec	eax
    ; Line 59
    	jne	SHORT $LN2@Unknown_Re
    ; Line 61
    	mov	eax, OFFSET _dwCount
    	or	edx, -1
    	lock	xadd DWORD PTR [eax], edx
    ; Line 63
    	push	ecx
    	call	DWORD PTR __imp__CoTaskMemFree@4
    ; Line 65
    	xor	eax, eax
    $LN2@Unknown_Re:
    ; Line 66
    	ret	4
    _Unknown_Release@4 ENDP
    _TEXT	ENDS
    END
    
    Notice the in(s)ane use of the EBX register around the inlined memcmp() function.

Contact

If you miss anything here, have additions, comments, corrections, criticism or questions, want to give feedback, hints or tipps, report broken links, bugs, errors, inaccuracies, omissions, vulnerabilities or weaknesses, …:
don’t hesitate to contact me and feel free to ask, comment, criticise, flame, notify or report!

Use the X.509 certificate to send S/MIME encrypted mail.

Notes: I dislike HTML (and even weirder formats too) in email, I prefer to receive plain text.
I also expect to see your full (real) name as sender, not your nickname!
Emails in weird formats and without a proper sender name are likely to be discarded.
I abhor top posts and expect inline quotes in replies.

Terms and Conditions

By using this site, you signify your agreement to these terms and conditions. If you do not agree to these terms and conditions, do not use this site!

Data Protection Declaration

This web page records no data and sets no cookies.

The service provider for *.homepage.t-online.de, Deutsche Telekom AG,


Copyright © 1995–2018 • Stefan Kanthak • <‍stefan‍.‍kanthak‍@‍nexgo‍.‍de‍>