Tag Archives: QueryPerformanceFrequency

C++ Speed Test with FPU and ints

I wanted to test the difference on modern hardware between floating point match and integer math. Here is my code (which was similar to C# code previously written).

// CSpeedTest.cpp : Defines the entry point for the console application.
//

#include "stdafx.h"
#include 
#include 
#include 

using namespace std;

#define FP_MULT
//#define FP_NEG
//#define INT_MULT
//#define INT_NEG

class StopWatch
{
    LARGE_INTEGER m_freq;
    LARGE_INTEGER m_startTime;
    LONGLONG m_totalTime;
public: 
    StopWatch() : m_totalTime(0L)
    {
        QueryPerformanceFrequency(&m_freq);
    }

    void Start()
    {
        QueryPerformanceCounter(&m_startTime);
    }

    void Stop()
    {
        LARGE_INTEGER stopTime;
        QueryPerformanceCounter(&stopTime);
        m_totalTime += (stopTime.QuadPart - m_startTime.QuadPart);
    }

    void Reset()
    {
        m_totalTime = 0L;
    }

    double ElapsedTime()
    {
        return (double)(m_totalTime) / (double)(m_freq.QuadPart);
    }
};

int _tmain(int argc, _TCHAR* argv[])
{
    #if defined(FP_MULT) || defined(FP_NEG)
    volatile double poo = 0.0;
    #endif
    #if defined(INT_MULT) || defined(INT_NEG)
    volatile int poo = 0;
    #endif

    StopWatch stopWatch;
    for (int idx = 0; idx < 1000000000; idx++)
    {
      stopWatch.Start();
      #if defined(FP_MULT)
        poo = -1.0 * poo;
      #endif
      #if defined(FP_NEG) || defined(INT_NEG)
        poo = -poo;
      #endif
      #if defined(INT_MULT)
        poo = -1 * poo;
      #endif
      stopWatch.Stop();
    }

    double elapsedTime = stopWatch.ElapsedTime();

    int minutes = elapsedTime / 60;
    int seconds = (int) (elapsedTime) % 60;
    int ms10 = (elapsedTime - int(elapsedTime)) * 100;

    cout << setfill('0') << setw(2) << minutes << ':' << seconds << ':' << ms10 << endl;;

    return 0;
}

The code was compiled as a console application for Win32 Debug so the variables would get “registered”.

The test machine is a Dell Precision M4800. The process is an Intel Core i7-4800MQ CPU at 2.70 GHz with 16GB ram. The OS is Windows 7 Professional 64 bit with SP1.

Here is the results. I have also included the assembler for the operation under test.

define time assembly
FP_MULT 7.32s fld qword ptr [__real@bff0000000000000 (0BE7938h)]; fmul qword ptr [poo]; fstp qword ptr [poo]
FP_NEG 7.56s fld qword ptr [poo]; fchs; fstp qword ptr [poo]
INT_MULT 7.58s mov eax,dword ptr [poo]; imul eax,eax,0FFFFFFFFh; mov dword ptr [poo],eax
INT_NEG 7.59s mov eax,dword ptr [poo]; neg eax; mov dword ptr [poo],eax

I actually don’t believe I have accomplished too much as the setup to call the timing functions actually take many, many more opcodes.  However, this was an interesting experiment and I do now have a cool C++ stopwatch on Windows for more extensive testing on much larger blocks of test code.