
#include <stdio.h>

#ifdef LINUX
#include <stdlib.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <unistd.h>
#else
#include <windows.h>
#endif

#define USE_PROC_CPUINFO_ON_LINUX

#ifdef LINUX
#define N_TICKS 100000
#else
#define N_TICKS 1000
#endif

int p_time_hi=0,p_time_lo=0,g_time_hi=0,g_time_lo=0;

#ifndef LINUX
#define CPUID _asm _emit 0x0f _asm _emit 0xa2
#define RDTSC _asm _emit 0x0f _asm _emit 0x31

static int has_time_stamp_counter (void)
{
_asm {
	push	ebx
	push	ecx
	push	edx

	pushfd
	mov	eax,200000h
	pop	ebx
	xor	eax,ebx
	push	eax
	popfd
	pushfd
	pop	eax
	
	xor	eax,ebx
	jz	no_cpuid_instruction

	mov	eax,1

	CPUID
	
	shr	edx,4
	mov	eax,1
	and	eax,edx

no_cpuid_instruction:
	pop	edx
	pop	ecx
	pop	ebx
	}
}

static void read_time_stamp_counter (LARGE_INTEGER *large_int_p)
{
_asm{
	push	ecx
	push	edx
	mov	ecx,large_int_p

	RDTSC

	mov	dword ptr 0[ecx],eax
	mov	dword ptr 4[ecx],edx
	pop	edx
	pop	ecx
	}
}

extern void compute_profile_overhead (LARGE_INTEGER *large_int_p)
{
_asm {
	jmp	compute_profile_overhead_

profile_:
	push	eax
	push	edx

	RDTSC

	sub	eax,g_time_lo
	sbb	edx,g_time_hi
	add	p_time_lo,eax
	adc	p_time_hi,edx

	RDTSC

	mov	g_time_hi,edx
	pop	edx
	mov	g_time_lo,eax
	pop	eax
	ret

compute_profile_overhead_:
	mov	eax,large_int_p
	push	ebx
	push	ecx
	push	edx
	push	ebp
	
	xor	ecx,ecx
	xor	edx,edx
	mov	ebx,100000
	
	call	profile_
	mov	p_time_lo,ecx
	mov	p_time_hi,edx

compute_profile_overhead_lp1:
	lea	ebp,p_time_lo
	call	profile_

	add	ecx,ecx
	add	edx,edx
	
	sub	ebx,1
	jne	compute_profile_overhead_lp1

	mov	ecx,p_time_lo
	mov	edx,p_time_hi
	mov	dword ptr 0[eax],ecx
	mov	dword ptr 4[eax],edx

	xor	ecx,ecx
	xor	edx,edx
	mov	ebx,100000
	
	call	profile_
	mov	p_time_lo,ecx
	mov	p_time_hi,edx

compute_profile_overhead_lp2:
	add	ecx,ecx
	add	edx,edx
	
	sub	ebx,1
	jne	compute_profile_overhead_lp2

	call	profile_

	mov	ecx,p_time_lo
	mov	edx,p_time_hi
	mov	dword ptr 8[eax],ecx
	mov	dword ptr 12[eax],edx

	pop	ebp
	pop	edx
	pop	ecx
	pop	ebx
	}
}

#else

typedef long long LARGE_INTEGER;

extern int has_time_stamp_counter (void);
extern void read_time_stamp_counter (LARGE_INTEGER *);
extern void compute_profile_overhead (LARGE_INTEGER *);

#endif

#ifdef LINUX
#define float_div(a,b) ((a)/(b))
#else
double float_div (double a,double b)
{
	_asm {
	fld	a
	fdiv b
	}
}
#endif

#define N_TIME_SAMPLES 40
#define N_PROFILE_SAMPLES 40

static void swap_bytes (unsigned char *p1,unsigned char *p2,int n)
{
	while (n!=0){
		unsigned char b;
		b=*p1;
		*p1=*p2;
		*p2=b;
		++p1;
		++p2;
		--n;
	}
}

#define qsort hsort

static void hsort (void *a_,unsigned int high,unsigned int element_size,int compare (void*,void*))
{
	unsigned int low,father,son;
	unsigned char *a;
	a=(unsigned char*)a_;

	low=high/2;
	while (high>1){
		father=low;
		for (;;){
			son=2*father+1;
			if (son>=high)
				break;
			if (son==high-1){
				if (compare (&a[element_size*father],&a[element_size*son])<0)
					swap_bytes (&a[element_size*son],&a[element_size*father],element_size);
				break;
			}
			if (compare (&a[element_size*son],&a[element_size*(son+1)])<0)
				++son;
			if (compare (&a[element_size*father],&a[element_size*son])>=0)
				break;
			swap_bytes (&a[element_size*son],&a[element_size*father],element_size);
			father=son;
		}
		if (low>0){
			--low;
		} else {
			--high;
			swap_bytes (&a[0],&a[element_size*high],element_size);
		}
	}
}

static int double_compare (const double *r1_p,const double *r2_p)
{
	if (*r1_p<*r2_p)
		return -1;
	else if (*r1_p>*r2_p)
		return 1;
	else
		return 0;
}

static int compare_large_integer (const LARGE_INTEGER *large_int_p1,const LARGE_INTEGER *large_int_p2)
{
#ifdef LINUX
	if (*large_int_p1<*large_int_p2)
		return -1;
	else if (*large_int_p1>*large_int_p2)
		return 1;
	else
		return 0;
#else
	if (large_int_p1->HighPart < large_int_p2->HighPart)
		return -1;
	else if (large_int_p1->HighPart > large_int_p2->HighPart)
		return 1;
	else if (large_int_p1->LowPart < large_int_p2->LowPart)
		return -1;
	else if (large_int_p1->LowPart > large_int_p2->LowPart)
		return 1;
	else
		return 0;
#endif
}

#if defined (LINUX) && defined (USE_PROC_CPUINFO_ON_LINUX)
double cpu_clock (void)
{
	FILE *f;
	int c;

	f=fopen ("/proc/cpuinfo","r");
	if (f==NULL)
		return 0.0;

	do {
		c=getc (f);
		while (c==' ' || c=='\t')
			c=getc (f);

		if ((c & ~0x20)=='C' && (c=getc (f),(c & ~0x20)=='P') && (c=getc (f),(c & ~0x20)=='U')){
			c=getc (f);
			if (c==' ' || c=='t'){
				do
					c=getc (f);
				while (c==' ' || c=='\t');
				if ((c & ~0x20)=='M' && (c=getc (f),(c & ~0x20)=='H') && (c=getc (f),(c & ~0x20)=='Z')){
					do
						c=getc (f);
					while (c==' ' || c=='\t');
					if (c==':'){
						do
							c=getc (f);
						while (c==' ' || c=='t');

						if (c>='0' && c<='9'){
							char s[65];
							int i;
			
							i=0;
							do {
								s[i++]=c;
								c=getc (f);
							} while (c>='0' && c<='9');
							if (c=='.'){
								s[i++]=c;
								c=getc (f);
								while (c>='0' && c<='9'){
									s[i++]=c;
									c=getc (f);
								}
							}
							s[i]='\0';

							fclose (f);

							return atof (s);
						}
					}
				}
			}
		}

		while (c!='\n' && c!=EOF)
			c=getc (f);
	} while (c=='\n');

	fclose (f);

	return 0.0;
}
#endif

#if !(defined (LINUX) && defined (USE_PROC_CPUINFO_ON_LINUX))
static double compute_average_frequency (double time_results[])
{
	int n,begin_n,end_n;
	double sum;

	begin_n=N_TIME_SAMPLES>>2;
	end_n=N_TIME_SAMPLES-begin_n;

	sum=0.0;
	for (n=begin_n; n<end_n; ++n)
		sum += time_results[n];

	return float_div (sum,(double)(end_n-begin_n));
}

static double determine_cpu_clock_speed (double frequency)
{
#ifdef LINUX
	struct timeval begin_time0,begin_time,end_time;
#else
	LARGE_INTEGER begin_time0,begin_time,end_time;
#endif
	LARGE_INTEGER tsc_begin_time,tsc_end_time;
	double time_results [N_TIME_SAMPLES];
	int n;

	for (n=0; n<N_TIME_SAMPLES; ++n){
#ifdef LINUX
		struct rusage rusage_struct;

		getrusage (RUSAGE_SELF,&rusage_struct);
		begin_time0=rusage_struct.ru_utime;
#else
		QueryPerformanceCounter (&begin_time0);
#endif
		do {
#ifdef LINUX
			getrusage (RUSAGE_SELF,&rusage_struct);
			begin_time=rusage_struct.ru_utime;
#else
			QueryPerformanceCounter (&begin_time);
#endif
			read_time_stamp_counter (&tsc_begin_time);
#ifdef LINUX
		} while (begin_time0.tv_sec==begin_time.tv_sec && begin_time0.tv_usec==begin_time.tv_usec);
#else
		} while (begin_time0.HighPart==begin_time.HighPart && begin_time0.LowPart==begin_time.LowPart);
#endif

		do {
#ifdef LINUX
			getrusage (RUSAGE_SELF,&rusage_struct);
			end_time=rusage_struct.ru_utime;
#else
			QueryPerformanceCounter (&end_time);
#endif
			read_time_stamp_counter (&tsc_end_time);

			{
				int n;
			
				for (n=0; n<10000; ++n)
					;
			}
#ifdef LINUX
		} while (		(end_time.tv_sec==begin_time.tv_sec)
					?	(end_time.tv_usec-begin_time.tv_usec<N_TICKS)
					:	(end_time.tv_sec==begin_time.tv_sec+1 && end_time.tv_usec+1000000-N_TICKS<begin_time.tv_usec));
#else
		} while (		(end_time.HighPart==begin_time.HighPart)
					?	(end_time.LowPart-begin_time.LowPart<N_TICKS)
					:	(end_time.HighPart==begin_time.HighPart+1 && end_time.LowPart-(unsigned)N_TICKS<begin_time.LowPart));
#endif

#ifdef LINUX
		tsc_end_time -= tsc_begin_time;
#else
		tsc_end_time.HighPart -= tsc_begin_time.HighPart;
		if (tsc_end_time.LowPart<tsc_begin_time.LowPart)
			--tsc_end_time.HighPart;
		tsc_end_time.LowPart -= tsc_begin_time.LowPart;
#endif

#ifdef LINUX
		end_time.tv_sec -= begin_time.tv_sec;
		if (end_time.tv_usec<begin_time.tv_usec){
			--end_time.tv_sec;
			end_time.tv_usec = end_time.tv_usec+1000000-begin_time.tv_usec;
		} else
			end_time.tv_usec -= begin_time.tv_usec;
#else
		end_time.HighPart -= begin_time.HighPart;
		if (end_time.LowPart<begin_time.LowPart)
			--end_time.HighPart;
		end_time.LowPart -= begin_time.LowPart;
#endif
#ifdef LINUX
		time_results[n]=(float_div ((double)tsc_end_time,(double)end_time.tv_usec))*frequency;
#else
		time_results[n]=(float_div ((double)tsc_end_time.LowPart,(double)end_time.LowPart))*frequency;
#endif

#if 0
# ifdef LINUX
		printf ("%-8d %-8d %-8d %-8d %g\n",
					(long)(((unsigned long long)(tsc_end_time-(unsigned long)tsc_end_time))>>32),(unsigned long)tsc_end_time,
					end_time.tv_sec,end_time.tv_usec,
					time_results[n]);
# else
		printf ("%-8d %-8d %-8d %-8d %g\n",
					tsc_end_time.HighPart,tsc_end_time.LowPart,
					end_time.HighPart,end_time.LowPart,
					time_results[n]);
# endif
#endif
	}

	qsort (time_results,N_TIME_SAMPLES,sizeof (double),double_compare);

	return compute_average_frequency (time_results);
}
#endif

static double determine_profile_overhead (void)
{
	int n,begin_n,end_n;
	LARGE_INTEGER loop_with_profile_time[N_PROFILE_SAMPLES],loop_without_profile_time[N_PROFILE_SAMPLES];
	LARGE_INTEGER with_profile_sum,without_profile_sum;
	
	for (n=0; n<N_PROFILE_SAMPLES; ++n){
		LARGE_INTEGER profile_overhead[2];
		
		compute_profile_overhead (profile_overhead);

		loop_with_profile_time[n]=profile_overhead[0];
		loop_without_profile_time[n]=profile_overhead[1];
/*
		printf ("%d %d %d %d\n",
			profile_overhead[0].HighPart,profile_overhead[0].LowPart,
			profile_overhead[1].HighPart,profile_overhead[1].LowPart
		);
*/
	}

	qsort (loop_with_profile_time,N_PROFILE_SAMPLES,sizeof (LARGE_INTEGER),compare_large_integer);
	qsort (loop_without_profile_time,N_PROFILE_SAMPLES,sizeof (LARGE_INTEGER),compare_large_integer);

#ifdef LINUX
	with_profile_sum=0;
	without_profile_sum=0;
#else
	with_profile_sum.HighPart=0;
	with_profile_sum.LowPart=0;
	without_profile_sum.HighPart=0;
	without_profile_sum.LowPart=0;
#endif

	begin_n=N_PROFILE_SAMPLES>>2;
	end_n=N_PROFILE_SAMPLES-begin_n;

	for (n=begin_n; n<end_n; ++n){
#ifdef LINUX
		with_profile_sum += loop_with_profile_time[n];
		without_profile_sum += loop_without_profile_time[n];
#else
		with_profile_sum.HighPart += loop_with_profile_time[n].HighPart;
		if (with_profile_sum.LowPart+loop_with_profile_time[n].LowPart < with_profile_sum.LowPart)
			++with_profile_sum.HighPart;
		with_profile_sum.LowPart += loop_with_profile_time[n].LowPart;

		without_profile_sum.HighPart += loop_without_profile_time[n].HighPart;
		if (without_profile_sum.LowPart+loop_without_profile_time[n].LowPart < without_profile_sum.LowPart)
			++without_profile_sum.HighPart;
		without_profile_sum.LowPart += loop_without_profile_time[n].LowPart;		
#endif
	}

#ifdef LINUX
	with_profile_sum -= without_profile_sum;
#else
	with_profile_sum.HighPart -= without_profile_sum.HighPart;
	if (with_profile_sum.LowPart < without_profile_sum.LowPart)
		--with_profile_sum.HighPart;
	with_profile_sum.LowPart -= without_profile_sum.LowPart;
#endif

#ifdef LINUX
	return float_div
		((double)with_profile_sum,
		 (double)(N_PROFILE_SAMPLES>>1)*100000.0);
#else
	return float_div
		((double)with_profile_sum.LowPart+(65536.0*65536.0)*(double) with_profile_sum.HighPart,
		 (double)(N_PROFILE_SAMPLES>>1)*100000.0);
#endif
}

#ifndef LINUX
static void set_thread_priority (HANDLE thread_handle,int old_priority,int new_priority)
{
	if (old_priority!=THREAD_PRIORITY_ERROR_RETURN)
		SetThreadPriority (thread_handle,new_priority);
}
#endif

static void init_fpu (void)
{
#ifndef LINUX
	_asm {
	finit
	fldz
	fldz
	fldz
	fldz
	fldz
	fldz
	fldz
	}
#endif
}

int measure_clock_speed_and_profile_overhead (double *clock_speed_p,double *profile_overhead_p)
{
	LARGE_INTEGER performance_frequency;
	double frequency,average_frequency;
#ifndef LINUX
	HANDLE thread_handle;
#endif
	int priority;

	if (!has_time_stamp_counter()){
		*clock_speed_p=0.0;
		*profile_overhead_p=0.0;
#ifndef LINUX
		init_fpu(); /* for virtualpc */
#endif
		return 1;
	}

#ifdef LINUX
	priority=getpriority (PRIO_PROCESS,0);
	setpriority (PRIO_PROCESS,0,20);
#else
	thread_handle=GetCurrentThread();
	priority=GetThreadPriority (thread_handle);
	set_thread_priority (thread_handle,priority,THREAD_PRIORITY_TIME_CRITICAL);
#endif

#ifdef LINUX	
	performance_frequency=0;
#else
	performance_frequency.LowPart=0;
	performance_frequency.HighPart=0;
#endif

#ifdef LINUX
	frequency = (double)1000000;
#else
	if (!QueryPerformanceFrequency (&performance_frequency)){
#ifdef LINUX
		setpriority (PRIO_PROCESS,0,priority);
#else
		set_thread_priority (thread_handle,priority,priority);
#endif
		*clock_speed_p=0.0;
		*profile_overhead_p=0.0;
		init_fpu(); /* for virtualpc */
		return 2;
	}

	frequency = (double)performance_frequency.LowPart +
				(double)performance_frequency.HighPart * (65536.0*65536.0);
#endif

#if defined (LINUX) && defined (USE_PROC_CPUINFO_ON_LINUX)
	average_frequency = cpu_clock();
	if (! (average_frequency>0.0)){
		*clock_speed_p=0.0;
		*profile_overhead_p=0.0;
		return 2;
	}
	*clock_speed_p = average_frequency;
#else
	average_frequency = determine_cpu_clock_speed (frequency);

	*clock_speed_p = float_div (average_frequency,1.0e6);
#endif
	*profile_overhead_p = determine_profile_overhead();

#ifdef LINUX
	setpriority (PRIO_PROCESS,0,priority);
#else
	set_thread_priority (thread_handle,priority,priority);
#endif

	init_fpu(); /* for virtualpc */

	return 0;
}

#if 0
int main (void)
{
	double clock_speed,profile_overhead;
	int r;

	r=measure_clock_speed_and_profile_overhead (&clock_speed,&profile_overhead);

	printf ("%d %g %g\n",r,clock_speed,profile_overhead);

	return r;
}
#endif

