What is rdtsc
- rdtsc is ReaD TimeStamp Count
- limition
- only work on x86 platform
uint64_t rdtsc()
{
uint32_t hi, lo;
__asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
return ( (uint64_t)lo)|( ((uint64_t)hi)<<32 );
}
rdtsc.h
#ifndef __RDTSC_H_DEFINED__
#define __RDTSC_H_DEFINED__
#if defined(__i386__)
static __inline__ unsigned long long rdtsc(void)
{
unsigned long long int x;
__asm__ volatile (".byte 0x0f, 0x31" : "=A" (x));
return x;
}
#elif defined(__x86_64__)
static __inline__ unsigned long long rdtsc(void)
{
unsigned hi, lo;
__asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 );
}
#elif defined(__powerpc__)
static __inline__ unsigned long long rdtsc(void)
{
unsigned long long int result=0;
unsigned long int upper, lower,tmp;
__asm__ volatile(
"0: \n"
"\tmftbu %0 \n"
"\tmftb %1 \n"
"\tmftbu %2 \n"
"\tcmpw %2,%0 \n"
"\tbne 0b \n"
: "=r"(upper),"=r"(lower),"=r"(tmp)
);
result = upper;
result = result<<32;
result = result|lower;
return(result);
}
#else
#error "No tick counter is available!"
#endif
/* $RCSfile: $ $Author: kazutomo $
* $Revision: 1.6 $ $Date: 2005/04/13 18:49:58 $
*/
#endif
Sample one
#include <stdio.h>
#include "rdtsc.h"
int main(int argc, char* argv[])
{
unsigned long long a,b;
a = rdtsc();
b = rdtsc();
printf("%llu\n", b-a);
return 0;
}
Sample two
#include <stdio.h>
#include <assert.h>
#include <stdint.h>
#include <stdlib.h>
#include "rdtsc.h"
#define N (1024*1024*2)
int main(int argc, char* argv[])
{
unsigned long long a,b;
unsigned long long min,max;
char* p;
int i;
p = (char*)malloc(N);
assert( p!=(char*)0 );
max = 0;
min = UINT64_MAX;
for(i=0; i<N; i++ ) {
a = rdtsc();
p[i] = 0;
b = rdtsc() - a;
if( b > max ) max = b;
else if( b < min ) min = b;
}
printf("min=%llu\n", min);
printf("max=%llu\n", max);
return 0;
}
TODO: translate CPU cycle to time
time_in_seconds = number_of_clock_cycles / frequency
cat /proc/cpuinfo |grep 'MHz'
gettimeofday
and rdtsc
Something need to consider?
- Context switch?
- Multi-core processors
gettimeofday
#include <iostream>
#include <sys/time.h> // for gettimeofday()
using namespace std;
int main()
{
struct timeval t1, t2;
double elapsedTime;
// start timer
gettimeofday(&t1, NULL);
// do something
// ...
// stop timer
gettimeofday(&t2, NULL);
// compute and print the elapsed time in millisec
elapsedTime = (t2.tv_sec - t1.tv_sec) * 1000.0; // sec to ms
elapsedTime += (t2.tv_usec - t1.tv_usec) / 1000.0; // us to ms
cout << elapsedTime << " ms.\n";
return 0;
}
gettimeofday vs rdtsc, source code
#include <iostream>
#include <sys/time.h> // for gettimeofday()
#include <stdlib.h> // for malloc
#include <string.h> // for memcpy
#include "rdtsc.h"
// cat /proc/cpuinfo |grep 'MHz'
#define CPUMHZ 1200.0f
// frequency per micro second
#define CPU_SPEED_US (CPUMHZ*1024*1024/1000/1000)
using namespace std;
int main()
{
// test malloc
cout << "malloc:\n";
struct timeval t1, t2;
double elapsedTime;
gettimeofday(&t1, NULL);
char* p = (char*)malloc(4);
gettimeofday(&t2, NULL);
free(p);
elapsedTime = (t2.tv_sec - t1.tv_sec) * 1000000.0; // sec to us
elapsedTime += (t2.tv_usec - t1.tv_usec) ;
cout << "\t" << elapsedTime << " us (by gettimeofday)\n";
unsigned long long a,b;
a = rdtsc();
char* q = (char*)malloc(4);
b = rdtsc() - a;
free(q);
cout << "\ttimestamp count is " << b << ", elapsedTime " << b/CPU_SPEED_US << " us\n";
// test memcpy
cout << "memcpy:\n";
char tmp1[1024] = {0};
char tmp2[1024] = {'a'};
int count = 1000;
gettimeofday(&t1, NULL);
for (int i=0; i<count; i++) {
memcpy(tmp1, tmp2, sizeof(tmp1));
}
gettimeofday(&t2, NULL);
elapsedTime = (t2.tv_sec - t1.tv_sec) * 1000000.0; // sec to us
elapsedTime += (t2.tv_usec - t1.tv_usec) ;
cout << "\t" << elapsedTime << " us (by gettimeofday)\n";
a = rdtsc();
for (int i=0; i<count; i++) {
memcpy(tmp1, tmp2, sizeof(tmp1));
}
b = (rdtsc()-a)/count;
cout << "\ttimestamp count is " << b << ", elapsedTime " << b/CPU_SPEED_US << " us\n";
return 0;
}
output
dennis@dennis:~/tt$ g++ -O3 -o t t.cc
dennis@dennis:~/tt$ ./t
malloc:
0 us (by gettimeofday)
timestamp count is 58, elapsedTime 0.0460943 us
memcpy:
0 us (by gettimeofday)
timestamp count is 0, elapsedTime 0 us
dennis@dennis:~/tt$ g++ -O2 -o t t.cc
dennis@dennis:~/tt$ ./t
malloc:
0 us (by gettimeofday)
timestamp count is 69, elapsedTime 0.0548363 us
memcpy:
0 us (by gettimeofday)
timestamp count is 0, elapsedTime 0 us
dennis@dennis:~/tt$ g++ -O1 -o t t.cc
dennis@dennis:~/tt$ ./t
malloc:
0 us (by gettimeofday)
timestamp count is 58, elapsedTime 0.0460943 us
memcpy:
2 us (by gettimeofday)
timestamp count is 3, elapsedTime 0.00238419 us
dennis@dennis:~/tt$ g++ -o t t.cc
dennis@dennis:~/tt$ ./t
malloc:
80 us (by gettimeofday)
timestamp count is 1198, elapsedTime 0.952085 us
memcpy:
102 us (by gettimeofday)
timestamp count is 295, elapsedTime 0.234445 us
dennis@dennis:~/tt$ g++ -g -o t t.cc
dennis@dennis:~/tt$ ./t
malloc:
82 us (by gettimeofday)
timestamp count is 1421, elapsedTime 1.12931 us
memcpy:
timestamp count is 295, elapsedTime 0.234445 us
Reference