P
Philipp Kayser
Hi,
I wrote a small program to test scalability in a multiprocessor
environment (in my case an Athlon 64 X2). I included the source below.
To my surprise the calculation does not run faster if I use 2 threads
(which should be the case if I have 2 processors) but it runs 5 times
slower (e.g. 15s instead of 3s)! Everything else seems to be okay: If i
use 1 thread, I have a total CPU usage of 50%, if use two threads I get
100%.
The best thing is: if I limit the JVM to one processor by setting the
CPU affinity for the process, but again take 2 threads, the calculation
runs only 2 times slower (6s instead of 3s for one thread).
My current current diagnosis is that it may have something to do with
the CPU cache. I searched the Internet for similar problems and found
the terms "CPU Cache trashing" and "Ping-Pong-Effect": the processors
always switch between the two threads and by doing so their CPU cache
gets flushed every time.
Does anyone have an idea?
Best regards,
Philipp Kayser.
public class Test
{
private final int number_of_threads = 2;
private static int number_of_finished_threads;
private Thread threads[] = new Thread[number_of_threads];
double result[] = new double[number_of_threads];
private Thread main_thread;
private class CalculationThread extends Thread
{
int thread_number;
CalculationThread(int n)
{
super();
thread_number = n;
}
public void run()
{
try
{
synchronized (this)
{
while (true)
{
wait();
int n = 0;
for (n = 0; n < 600000000; n++)
if (n % number_of_threads == thread_number)
result[thread_number] += Math.sqrt(n);
synchronized (main_thread)
{
number_of_finished_threads++;
main_thread.notify();
}
}
}
}
catch (InterruptedException e)
{
}
}
}
private void multithreaded_calculation()
{
synchronized (main_thread)
{
number_of_finished_threads = 0;
int i;
for (i = 0; i < number_of_threads; i++)
{
synchronized (threads)
{
threads.notify();
}
}
do
{
try
{
main_thread.wait();
}
catch (InterruptedException e)
{
}
}
while (number_of_finished_threads < number_of_threads);
double total_result = 0;
for (i = 0; i < number_of_threads; i++)
total_result += result;
System.out.println(total_result);
}
}
private void test()
{
main_thread = Thread.currentThread();
int i;
for (i = 0; i < number_of_threads; i++)
{
threads = new CalculationThread(i);
threads.setPriority(Thread.NORM_PRIORITY);
threads.setDaemon(true);
threads.start();
}
try
{
Thread.sleep(1000);
}
catch (InterruptedException e)
{
}
long t0 = new Date().getTime();
multithreaded_calculation();
long t1 = new Date().getTime();
System.out.println(((double)t1 - t0)/1000);
}
public static void main(String[] args) {
new Test().test();
}
}
I wrote a small program to test scalability in a multiprocessor
environment (in my case an Athlon 64 X2). I included the source below.
To my surprise the calculation does not run faster if I use 2 threads
(which should be the case if I have 2 processors) but it runs 5 times
slower (e.g. 15s instead of 3s)! Everything else seems to be okay: If i
use 1 thread, I have a total CPU usage of 50%, if use two threads I get
100%.
The best thing is: if I limit the JVM to one processor by setting the
CPU affinity for the process, but again take 2 threads, the calculation
runs only 2 times slower (6s instead of 3s for one thread).
My current current diagnosis is that it may have something to do with
the CPU cache. I searched the Internet for similar problems and found
the terms "CPU Cache trashing" and "Ping-Pong-Effect": the processors
always switch between the two threads and by doing so their CPU cache
gets flushed every time.
Does anyone have an idea?
Best regards,
Philipp Kayser.
public class Test
{
private final int number_of_threads = 2;
private static int number_of_finished_threads;
private Thread threads[] = new Thread[number_of_threads];
double result[] = new double[number_of_threads];
private Thread main_thread;
private class CalculationThread extends Thread
{
int thread_number;
CalculationThread(int n)
{
super();
thread_number = n;
}
public void run()
{
try
{
synchronized (this)
{
while (true)
{
wait();
int n = 0;
for (n = 0; n < 600000000; n++)
if (n % number_of_threads == thread_number)
result[thread_number] += Math.sqrt(n);
synchronized (main_thread)
{
number_of_finished_threads++;
main_thread.notify();
}
}
}
}
catch (InterruptedException e)
{
}
}
}
private void multithreaded_calculation()
{
synchronized (main_thread)
{
number_of_finished_threads = 0;
int i;
for (i = 0; i < number_of_threads; i++)
{
synchronized (threads)
{
threads.notify();
}
}
do
{
try
{
main_thread.wait();
}
catch (InterruptedException e)
{
}
}
while (number_of_finished_threads < number_of_threads);
double total_result = 0;
for (i = 0; i < number_of_threads; i++)
total_result += result;
System.out.println(total_result);
}
}
private void test()
{
main_thread = Thread.currentThread();
int i;
for (i = 0; i < number_of_threads; i++)
{
threads = new CalculationThread(i);
threads.setPriority(Thread.NORM_PRIORITY);
threads.setDaemon(true);
threads.start();
}
try
{
Thread.sleep(1000);
}
catch (InterruptedException e)
{
}
long t0 = new Date().getTime();
multithreaded_calculation();
long t1 = new Date().getTime();
System.out.println(((double)t1 - t0)/1000);
}
public static void main(String[] args) {
new Test().test();
}
}