S
Stefan Ram
Hotspot is being given Bytecode, which is as good as source code.
Hotspot then translates this to the language of the hardware
processor.
Therefore, a code segment, that does not allocate or release
memory and does not call other methods, but only does double-
and int-arithmetics, loops and branches should be compiled to
machine code similar to the one created by a C++ compiler.
Thus, it should be approximately as fast as the C++ code (or
even faster, because Hotspot knows more about the runtime
environment than the C++ compiler does).
So why is the Java program in the following benchmark still
slower than the C++ program?
(Most of the runtime is spent in the first function / method
that does not call other functions / methods nor does memory
allocations / deallocations.)
#include <stdio.h>
#include "windows.h"
#define BAILOUT 16
#define MAX_ITERATIONS 1000
int mandelbrot( double const x, double const y )
{ double cr = y - 0.5;
double zi = 0.0;
double zr = 0.0;
int i = 0;
while( 1 )
{ ++i;
double const temp = zr * zi;
double const zr2 = zr * zr;
double const zi2 = zi * zi;
zr = zr2 - zi2 + cr;
zi = temp + temp + x;
if( zi2 + zr2 > BAILOUT )return i;
if( i > MAX_ITERATIONS )return 0; }}
void main1()
{ char * p =( char * )malloc( 82 * 82 + 82 );
if( p )
{ char * q = p;
for( int y = -39; y < 39; ++y )
{ *q++ = '\n';
for( int x = -39; x < 39; ++x )
*q++ = mandelbrot( x / 40.0, y / 40.0 )? ' ' : '*'; }
*q++ = 0;
// puts( p );
free(( void * )p ); }}
int main()
{ for( int i = 0; i < 20; ++i )
{ long const start =( long )GetTickCount();
main1();
printf( "%ld ms\n",( long )GetTickCount() - start ); }
system( "PAUSE" ); }
154 ms
149 ms
148 ms
149 ms
147 ms
147 ms
147 ms
146 ms
146 ms
146 ms
145 ms
150 ms
145 ms
145 ms
146 ms
145 ms
150 ms
148 ms
149 ms
148 ms
public class Main
{ final static int BAILOUT = 16;
final static int MAX_ITERATIONS = 1000;
public static int iterate( final double x, final double y )
{ final double cr = y - 0.5;
double zi = 0.0;
double zr = 0.0;
int i = 0;
while( true )
{ ++i;
final double temp = zr * zi;
final double zr2 = zr * zr;
final double zi2 = zi * zi;
zr = zr2 - zi2 + cr;
zi = temp + temp + x;
if( zi2 + zr2 > BAILOUT )return i;
if( i > MAX_ITERATIONS )return 0; }}
public static void main1()
{ final char[] sb = new char[ 82 * 82 + 82 ];
int q = 0;
for( int y = -39; y < 39; ++y )
{ sb[ q++ ]= '\n';
for( int x = -39; x < 39; ++x )
sb[ q++ ]= iterate( x / 40.0, y / 40.0 )== 0 ? '*' : ' '; }
/* java.lang.System.out.println( new java.lang.String( sb )); */ }
public static void main( final java.lang.String[] args )
{ for( int i = 0; i < 20; ++i )
{ final long a = System.nanoTime();
main1();
final long dt = System.nanoTime() - a;
java.lang.System.out.println(( dt / 1000 / 1000 )+ " ms" ); }}}
358 ms
220 ms
279 ms
214 ms
215 ms
214 ms
215 ms
214 ms
215 ms
215 ms
215 ms
214 ms
215 ms
214 ms
215 ms
214 ms
236 ms
214 ms
216 ms
214 ms
C++ 6,75 main1/s
********************************************************************
Java 4,67 main1/s
***********************************************
Based on code from
http://www.timestretch.com/FractalBenchmark.html
Hotspot then translates this to the language of the hardware
processor.
Therefore, a code segment, that does not allocate or release
memory and does not call other methods, but only does double-
and int-arithmetics, loops and branches should be compiled to
machine code similar to the one created by a C++ compiler.
Thus, it should be approximately as fast as the C++ code (or
even faster, because Hotspot knows more about the runtime
environment than the C++ compiler does).
So why is the Java program in the following benchmark still
slower than the C++ program?
(Most of the runtime is spent in the first function / method
that does not call other functions / methods nor does memory
allocations / deallocations.)
#include <stdio.h>
#include "windows.h"
#define BAILOUT 16
#define MAX_ITERATIONS 1000
int mandelbrot( double const x, double const y )
{ double cr = y - 0.5;
double zi = 0.0;
double zr = 0.0;
int i = 0;
while( 1 )
{ ++i;
double const temp = zr * zi;
double const zr2 = zr * zr;
double const zi2 = zi * zi;
zr = zr2 - zi2 + cr;
zi = temp + temp + x;
if( zi2 + zr2 > BAILOUT )return i;
if( i > MAX_ITERATIONS )return 0; }}
void main1()
{ char * p =( char * )malloc( 82 * 82 + 82 );
if( p )
{ char * q = p;
for( int y = -39; y < 39; ++y )
{ *q++ = '\n';
for( int x = -39; x < 39; ++x )
*q++ = mandelbrot( x / 40.0, y / 40.0 )? ' ' : '*'; }
*q++ = 0;
// puts( p );
free(( void * )p ); }}
int main()
{ for( int i = 0; i < 20; ++i )
{ long const start =( long )GetTickCount();
main1();
printf( "%ld ms\n",( long )GetTickCount() - start ); }
system( "PAUSE" ); }
154 ms
149 ms
148 ms
149 ms
147 ms
147 ms
147 ms
146 ms
146 ms
146 ms
145 ms
150 ms
145 ms
145 ms
146 ms
145 ms
150 ms
148 ms
149 ms
148 ms
public class Main
{ final static int BAILOUT = 16;
final static int MAX_ITERATIONS = 1000;
public static int iterate( final double x, final double y )
{ final double cr = y - 0.5;
double zi = 0.0;
double zr = 0.0;
int i = 0;
while( true )
{ ++i;
final double temp = zr * zi;
final double zr2 = zr * zr;
final double zi2 = zi * zi;
zr = zr2 - zi2 + cr;
zi = temp + temp + x;
if( zi2 + zr2 > BAILOUT )return i;
if( i > MAX_ITERATIONS )return 0; }}
public static void main1()
{ final char[] sb = new char[ 82 * 82 + 82 ];
int q = 0;
for( int y = -39; y < 39; ++y )
{ sb[ q++ ]= '\n';
for( int x = -39; x < 39; ++x )
sb[ q++ ]= iterate( x / 40.0, y / 40.0 )== 0 ? '*' : ' '; }
/* java.lang.System.out.println( new java.lang.String( sb )); */ }
public static void main( final java.lang.String[] args )
{ for( int i = 0; i < 20; ++i )
{ final long a = System.nanoTime();
main1();
final long dt = System.nanoTime() - a;
java.lang.System.out.println(( dt / 1000 / 1000 )+ " ms" ); }}}
358 ms
220 ms
279 ms
214 ms
215 ms
214 ms
215 ms
214 ms
215 ms
215 ms
215 ms
214 ms
215 ms
214 ms
215 ms
214 ms
236 ms
214 ms
216 ms
214 ms
C++ 6,75 main1/s
********************************************************************
Java 4,67 main1/s
***********************************************
Based on code from
http://www.timestretch.com/FractalBenchmark.html