"io_x" <
[email protected]> ha scritto nel messaggio
what about
e_type medianr(e_type a, e_type b, e_type c)
{e_type r;
if(a>c) {r=a, a=c, c=r;}
if(a>b) {r=a, a=b, b=r;}
if(c<b) {r=c, c=b, b=r;}
return b;
}
; medianrAsm(r, a, c)
; 0ra, 4P1, 8P2, 12P3
_medianrAsm:
mov edx, dword[esp+ 4]
mov eax, dword[esp+ 8]
mov ecx, dword[esp+ 12]
cmp edx, ecx
jle .1
xchg edx, ecx
.1: cmp edx, eax
jle .2
xchg eax, edx
.2: cmp ecx, eax
jge .3
xchg eax, ecx
.3:
ret
-------------------------
with
-------
section _DATA use32 public class=DATA
global _medianrAsm
section _TEXT use32 public class=CODE
; a r c
; 0 1 2 -
; medianrAsm(r, a, c)
; 0ra, 4P1, 8P2, 12P3
align 8
_medianrAsm:
mov eax, dword[esp+ 4]
mov edx, dword[esp+ 8]
mov ecx, dword[esp+ 12]
cmp eax, ecx
jle .1
xchg eax, ecx
; a<=c 0<=2
.1: cmp edx, eax
jg .2
ret ; r<=a<=c 1<=0<=2
.2: cmp ecx, edx
jg .3
mov eax, ecx
ret ; a<=c<=r 0<=2<=1
.3: mov eax, edx ; a<=r<=c
ret
---
Prova median3
m=843962432 Result: 29.000000
--------------
Prova median3a
m=843962432 Result: 28.000000
--------------
Prova median3b
m=843962432 Result: 43.000000
--------------
Prova medianr
m=843962432 Result: 29.000000
--------------
Prova medianrAsm
m=843962432 Result: 28.000000
--------------
they are all of the same speed but
median3b
seems slower
------------------------
#define P printf
/*
Surprising to me, the switch ran 4x slower than the simple if() tests.
It appears that the missed branch predictions are killer.
I guess that profile guided optimization will help your routine
quite a bit if the data has some particular pattern.
*/
typedef int e_type;
e_type median3(e_type a, e_type b, e_type c)
{
return (a < b) ? ((b < c) ? b : ((a < c) ? c : a)) : ((a < c) ?
a : ((b < c) ? c : b));
}
e_type medianrAsm(e_type a, e_type b, e_type c);
e_type medianr(e_type a, e_type b, e_type c)
{e_type r;
if(a>c) {r=a; a=c; c=r;}
/* a<c */
if(a>b) return a;
if(c<b) return c; /* a<c<b */
return b;
}
e_type median3a(e_type a, e_type b, e_type c)
{
if (a < b) {
if (b < c)
return b; /* a b c */
if (c < a)
return a; /* c a b */
return c; /* a c b */
} else {
if (a < c)
return a; /* b a c */
if (c < b)
return b; /* c b a */
return c; /* b c a */
}
}
e_type median3b(e_type a, e_type b, e_type c)
{
switch ((a > b) + ((b > c) << 1) + ((c > a) << 2)) {
case 0:
return a;
case 1:
return c;
case 2:
return a;
case 3:
return b;
case 4:
return b;
case 5:
return a;
case 6:
default:
return c;
}
}
#include <stdio.h>
#include <time.h>
#include <stdlib.h>
double prova(e_type (*f)(e_type, e_type, e_type))
{time_t t1, t2;
double d;
e_type i, j, k, m=0, m1, m2;
t1=time(0);
for(i=0; i<1000; i++)
for(j=0; j<1000; j++)
for (k=0; k<3000; k++)
{m1=i^j; m2=i^k;
m += f(m1, i, m2);
}
t2=time(0);
d=difftime(t2, t1);
P("m=%u ", (unsigned) m);
return d;
}
double prova1(e_type (*f)(e_type, e_type, e_type))
{time_t t1, t2;
double d;
e_type i, j, k, m=0, kk;
t1=time(0);
for(kk=0; kk<300000000 ; ++kk)
{i=rand(); j=rand(); k=rand();
m+=f(i, j, k);
}
t2=time(0);
d=difftime(t2, t1);
P("m=%u ", (unsigned) m);
return d;
}
int main(void)
{ double h;
e_type i,
j,
k,
m,
m2,
m3;
for (i = 0; i < 300; i++)
for (j = 0; j < 300; j++)
for (k = 0; k < 300; k++) {
m = median3(i, j, k);
m2 = medianr(i, j, k);
m3 = medianrAsm(i, j, k);
if (m3 != m)
{printf("Disagreement of %d verses %d\n", m3, m);
printf("median of %d %d %d is %d\n", i, j, k, m3);
return 0;
}
if (m2 != m)
{printf("Disagreement of %d verses %d\n", m2, m);
printf("median of %d %d %d is %d\n", i, j, k, m2);
return 0;
}
}
P("Prova median3\n");
h=prova(median3);
P("Result: %f\n", h);
P("--------------\n");
P("Prova median3a\n");
h=prova(median3a);
P("Result: %f\n", h);
P("--------------\n");
P("Prova median3b\n");
h=prova(median3b);
P("Result: %f\n", h);
P("--------------\n");
P("Prova medianr\n");
h=prova(medianr);
P("Result: %f\n", h);
P("--------------\n");
P("Prova medianrAsm\n");
h=prova(medianrAsm);
P("Result: %f\n", h);
P("--------------\n");
return 0;
}
My result:
typedef int e_type;
e_type
median3(e_type a, e_type b, e_type c)
{
return (a < b) ? ((b < c) ? b : ((a < c) ? c : a)) : ((a < c) ?
a : ((b < c) ? c : b));
}
e_type
median3a(e_type a, e_type b, e_type c)
{
if (a < b) {
if (b < c)
return b; /* a b c */
if (c < a)
return a; /* c a b */
return c; /* a c b */
} else {
if (a < c)
return a; /* b a c */
if (c < b)
return b; /* c b a */
return c; /* b c a */
}
}
e_type
median3b(e_type a, e_type b, e_type c)
{
switch ((a > b) + ((b > c) << 1) + ((c > a) << 2)) {
case 0:
return a;
case 1:
return c;
case 2:
return a;
case 3:
return b;
case 4:
return b;
case 5:
return a;
case 6:
default:
return c;
}
}
e_type
median3c(e_type a, e_type b, e_type c)
{
e_type elist[7] =
{a, c, a, b, b, a, c};
return elist[(a > b) + ((b > c) << 1) + ((c > a) << 2)];
}
e_type
median3d(e_type a, e_type b, e_type c)
{
e_type *pa = &a;
e_type *pb = &b;
e_type *pc = &c;
const e_type *elist[7] =
{pa, pc, pa, pb, pb, pa, pc};
return *elist[(a > b) + ((b > c) << 1) + ((c > a) << 2)];
}
e_type
median3e(e_type a, e_type b, e_type c)
{
/* 2.0 1.1 0.2 0 */
switch ((0x2148U >> ((((a > b) << 1) + ((b > c) << 2) + ((c > a)
<< 3)))) & 3) {
case 0:
return a;
case 1:
return b;
case 2:
default:
return c;
}
}
e_type
median3f(e_type a, e_type b, e_type c)
{
/* 2.0 1.1 0.2 0 */
return (((0x2148U >> ((((a > b) << 1) + ((b > c) << 2) + ((c > a)
<< 3)))) & 3) == 0) * a
+ (((0x2148U >> ((((a > b) << 1) + ((b > c) << 2) + ((c >
a) << 3)))) & 3) == 1) * b
+ (((0x2148U >> ((((a > b) << 1) + ((b > c) << 2) + ((c >
a) << 3)))) & 3) == 2) * c
;
}
e_type
median3g(e_type a, e_type b, e_type c)
{
/* register */
unsigned tmp;
/* 2.0 1.1 0.2 0 */
tmp = ((0x2148U >> ((((a > b) << 1) + ((b > c) << 2) + ((c > a) <<
3)))) & 3);
return tmp & 2 ? c : tmp ? b : a;
}
e_type
median3h(e_type a, e_type b, e_type c)
{
e_type r;
if (a > c) {
r = a, a = c, c = r;
}
if (a > b) {
r = a, a = b, b = r;
}
if (c < b) {
r = c, c = b, b = r;
}
return b;
}
#ifdef UNIT_TEST
#include <stdio.h>
int main(void)
{
e_type i,
j,
k,
m,
m2,
m3,
m4,
m5,
m6,
m7,
m8,
m9;
for (i = 0; i < 3000; i++)
for (j = 0; j < 3000; j++)
for (k = 0; k < 3000; k++) {
m = median3(i, j, k);
m2 = median3a(i, j, k);
m3 = median3b(i, j, k);
m4 = median3c(i, j, k);
m5 = median3d(i, j, k);
m6 = median3e(i, j, k);
m7 = median3f(i, j, k);
m8 = median3g(i, j, k);
m9 = median3h(i, j, k);
#ifdef SHOW_RESULTS
printf("median of %d %d %d is %d\n", i, j, k, m3);
#endif
if (m9 != m)
printf("Disagreement (9) of %d verses %d\n", m9,
m);
if (m8 != m)
printf("Disagreement (8) of %d verses %d\n", m8,
m);
if (m7 != m)
printf("Disagreement (7) of %d verses %d\n", m7,
m);
if (m6 != m)
printf("Disagreement (6) of %d verses %d\n", m6,
m);
if (m5 != m)
printf("Disagreement (5) of %d verses %d\n", m5,
m);
if (m4 != m)
printf("Disagreement (4) of %d verses %d\n", m4,
m);
if (m3 != m)
printf("Disagreement (3) of %d verses %d\n", m3,
m);
if (m2 != m)
printf("Disagreement (2) of %d verses %d\n", m2,
m);
}
return 0;
}
#endif
/*
Profile results were:
Function Name,Exclusive Samples,Exclusive Samples %,
"median3f","73,811",18.50,
"main","58,858",14.75,
"median3d","58,253",14.60,
"median3g","49,352",12.37,
"median3c","44,832",11.23,
"median3b","42,708",10.70,
"median3e","33,287",8.34,
"median3h","16,757",4.20,
"median3a","10,822",2.71,
"median3","10,380",2.60,
*/