Hi,
I have a question about floating point precision in C.
What is the minimum distinguishable difference between 2 floating point
numbers? Does this differ for various computers?
Is this the EPSILON? I know in float.h a FLT_EPSILON is defined to be
10^-5. Does this mean that the computer cannot distinguish between 2
numbers that differ by less than this epsilon?
A problem I am seeing is a difference in values from a floating point
computation for a run on a Windows machine compared to a run on a Linux
machine. The values differ by 10^-6.
Thanks for any help,
Michael
This is along post and I know there will be "comments"
People have lots of issues with the way Microsoft handles floating point
number on Windows systems. IMHO is sucks.
It seems that hacks left in from Intel's Pentium FPU problems may account
for some to the weirdness.
So now for the long part of this post.
Here is a program that attempts for find the number of "real" bits in the
floating point support by using only standard C functionality.
Well you know that's a lie about "standard C" whenever Microsoft and Windows
are involved.
/*
file: flt_precision.c
Find number of significant bits in the floating point fraction.
Sample output for Microsoft VC6:
float size 4
double size 8
long double size 8
Max delta for float 16777215, bits 24
Max delta for double 9007199254740991, bits 53
Max delta for long double 9007199254740991, bits 53
Sample output for gcc 2.95.3:
float size 4
double size 8
long double size 12
Max delta for float 16777215, bits 24
Max delta for double 9007199254740991, bits 53
Max delta for long double 18446744073709551615, bits 64
Notes:
The EPISLON value for each float data type should be
in your float.h standard library.
You should check to make sure that your implementation
matches your library.
The Microsoft compiler for windows does not support
long double at greater resolution that double.
*/
#include <stdio.h>
#include <float.h>
static void printFloat(float * f, int bits)
{
printf("Max delta for float %.0f, bits %d\n", *f, bits);
}
static void printDouble(double * d, int bits)
{
printf("Max delta for double %.f, bits %d\n", *d, bits);
}
/*
This code will take a some explaining.
1) printf does not deal well long floats accurately.
The solution to number one is to store the
floation point fraction as an unsigned interger.
2) Microsoft does not support long long data types.
The solution to number two is to use a Microsoft
non-portable data type.
*/
static void printLongDouble(long double * ld, int bits)
{
/*
#define MICROSOFT_STUPID_C
*/
#ifdef MICROSOFT_STUPID_C
unsigned _int64 delta;
delta = (unsigned _int64)(*ld);
printf("Max delta for long double %I64u, bits %d\n", delta, bits);
#else
long long delta;
delta = (unsigned long long)(*ld);
printf("Max delta for long double %llu, bits %d\n", delta, bits);
#endif
}
int main(int argc, char* argv[])
{
float f, f2, fp1, fd2;
double d, d2, dp1, dd2;
long double ld, ld2, ldp1, ldd2;
int bits;
printf ("float size %d\n",sizeof(f));
printf ("double size %d\n",sizeof(d));
printf ("long double size %d\n",sizeof(ld));
f = 1.0;
f2 = 2.0;
fp1 = 1.0;
fd2 = 1.0;
bits = 0;
do
{
bits++;
fd2 = fd2 / f2;
fp1 = f + fd2;
} while (f != fp1);
f = ((f / (fd2 * f2) - f) * f2) + f;
printFloat(&f, bits);
d = 1.0;
d2 = 2.0;
dp1 = 1.0;
dd2 = 1.0;
bits = 0;
do
{
bits++;
dd2 = dd2 / d2;
dp1 = d + dd2;
} while (d != dp1);
d = ((d / (dd2 * d2) - d) * d2) + d;
printDouble(&d, bits);
ld = 1.0;
ld2 = 2.0;
ldp1 = 1.0;
ldd2 = 1.0;
bits = 0;
do
{
bits++;
ldd2 = ldd2 / ld2;
ldp1 = ld + ldd2;
} while (ld != ldp1);
ld = ((ld / (ldd2 * ld2) - ld) * ld2) + ld;
printLongDouble(&ld, bits);
return 0;
}