extending NArray

C

Cameron McBride

------=_Part_9477_9412626.1139692302456
Content-Type: text/plain; charset=ISO-8859-1
Content-Transfer-Encoding: quoted-printable
Content-Disposition: inline

I'm trying to make the fast extensions to NArray, yet still preserve
the general nature of methods (i.e. works on all NArray types). As an
example, I'll show benchmarks of a simple weighted mean:

Benchmarking based:
num_runs =3D 1000
data_size =3D 100000
user system total real
NArray_rb 3.150000 0.660000 3.810000 ( 3.803652)
NArray_mod 3.930000 0.020000 3.950000 ( 3.949027)
NArray_dbl 1.040000 0.000000 1.040000 ( 1.039897)
GSL 1.670000 0.000000 1.670000 ( 1.665538)

(sorry, Gmail messed with the formating on paste)

I did the quick NArray_dbl hack, which is fast but explicitly casts to
'double', so it's not general. When I generalized it using some
internal SetFuncs of NArray, the result is slower than a ruby version
that does a double loop (two #sum calls).

What am I missing? As a success meter, I'd like it to be at least as
fast as GSL libs.

(benchmark and code attached. additionally uses inline and rb-gsl)

Thanks!

Cameron

------=_Part_9477_9412626.1139692302456
Content-Type: application/octet-stream; name=bench_wmean.rb
Content-Transfer-Encoding: 7bit
X-Attachment-Id: f_ejkf4aha
Content-Disposition: attachment; filename="bench_wmean.rb"

#!/usr/bin/env ruby

require 'benchmark'
include Benchmark

require 'gsl'
require 'na_wmean.rb'

n = 1_000
data_size = 100_000

x = NArray.float(data_size).random!
w = NArray.float(data_size).random!

gx = x.to_gv
gw = w.to_gv
def wmean_orig(xt,wt)
(xt * wt).sum / wt.sum
end

puts "Benchmarking based: "
puts " num_runs = #{n}"
puts " data_size = #{data_size}"

bmbm(12) do |be|
be.report("NArray_rb") { n.times { m = wmean_orig(x,w)} }
be.report("NArray_mod") { n.times { m = x.wmean(w)} }
be.report("NArray_dbl") { n.times { m = x.wmean_dbl(w)} }
be.report("GSL") { n.times { m = gx.wmean(gw)} }
end






------=_Part_9477_9412626.1139692302456
Content-Type: application/octet-stream; name=na_wmean.rb
Content-Transfer-Encoding: 7bit
X-Attachment-Id: f_ejkf4ll0
Content-Disposition: attachment; filename="na_wmean.rb"

#!/usr/bin/env ruby

require 'rubygems'
require 'narray'
require 'inline'

class NArray

inline do |builder|
builder.add_compile_flags %q(-I /export/home/cameron/sys/narray-0.5.8/)
builder.include '"narray.h"'
builder.include '"narray_local.h"' # few local things used in linspace

builder.c_raw <<-'END_CODE'
VALUE
wmean_dbl(int argc, VALUE *argv, VALUE self) {

int i;
struct NARRAY *nv, *nw;
double p_sum = 0.0, w_sum = 0.0;

GetNArray(self, nv);
GetNArray(argv[0], nw);

if(nv->total != nw->total)
rb_raise( rb_eArgError, "Vector and weight must be same size!" );

for(i=0 ; i < nv->total ; i++) {
p_sum += ((double *)nw->ptr) * ((double *)nv->ptr);
w_sum += ((double *)nw->ptr);
}

return rb_float_new( p_sum / w_sum);
}
END_CODE

builder.c_raw <<-'END_CODE'
VALUE
wmean(int argc, VALUE *argv, VALUE self) {

int i,sv,sw;
struct NARRAY *nv, *nw;
double wt,val;
double p_sum = 0.0, w_sum = 0.0;
char *v,*w;
void (*na_getv)();
void (*na_getw)();

GetNArray(self, nv);
GetNArray(argv[0], nw);

if(nv->total != nw->total)
rb_raise( rb_eArgError, "Vector and weight must be same size!" );

na_getv = SetFuncs[NA_DFLOAT][nv->type];
na_getw = SetFuncs[NA_DFLOAT][nw->type];

sv = na_sizeof[nv->type];
sw = na_sizeof[nw->type];

v = nv->ptr;
w = nw->ptr;

for(i=0 ; i < nv->total ; i++) {
(*na_getv)( 1, &val, 0, v, 0 );
(*na_getw)( 1, &wt, 0, w, 0 );

v += sv;
w += sw;

p_sum += (wt) * (val);
w_sum += (wt);
}

return rb_float_new( p_sum / w_sum);
}
END_CODE
end

end






------=_Part_9477_9412626.1139692302456--
 
M

Masahiro TANAKA

|From: Cameron McBride
I'm trying to make the fast extensions to NArray, yet still preserve
the general nature of methods (i.e. works on all NArray types). As an
example, I'll show benchmarks of a simple weighted mean:

Benchmarking based:
num_runs = 1000
data_size = 100000
user system total real
NArray_rb 3.150000 0.660000 3.810000 ( 3.803652)
NArray_mod 3.930000 0.020000 3.950000 ( 3.949027)
NArray_dbl 1.040000 0.000000 1.040000 ( 1.039897)
GSL 1.670000 0.000000 1.670000 ( 1.665538)
I did the quick NArray_dbl hack, which is fast but explicitly casts to
'double', so it's not general. When I generalized it using some
internal SetFuncs of NArray, the result is slower than a ruby version
that does a double loop (two #sum calls).

What am I missing? As a success meter, I'd like it to be at least as
fast as GSL libs.

This is probably because a function (SetFuncs) is called
every step of the loop. How about using na_change_type() ?

VALUE
wmean_dbl(int argc, VALUE *argv, VALUE self) {

int i;
struct NARRAY *nv, *nw;
double p_sum = 0.0, w_sum = 0.0;
VALUE vself, varg0;

vself = na_change_type(self, NA_DFLOAT);
varg0 = na_change_type(argv[0], NA_DFLOAT);

GetNArray(vself, nv);
GetNArray(varg0, nw);

if(nv->total != nw->total)
rb_raise( rb_eArgError, "Vector and weight must be same size!" );

for(i=0 ; i < nv->total ; i++) {
p_sum += ((double *)nw->ptr) * ((double *)nv->ptr);
w_sum += ((double *)nw->ptr);
}

return rb_float_new( p_sum / w_sum);
}

Masahiro Tanaka
 
C

Cameron McBride

------=_Part_8007_29965385.1139814889253
Content-Type: text/plain; charset=ISO-8859-1
Content-Transfer-Encoding: quoted-printable
Content-Disposition: inline

Thank you for your response, Tanaka-san.

This is probably because a function (SetFuncs) is called
every step of the loop. How about using na_change_type() ?

Your suggestion is very fast when the types are matched, but if the
original is not a double, the penalty for copying the array is
significant. Also, it's incorrect if the type cannot be represented
as a float (e.g. complex). (see benchmarks below)

I appreciate the general nature on the design of NArray, and I've
learned several nice tricks from investigating it. However, the moral
I'm taking home is that if speed, efficiency and generality are all at
issue, individual routines for each NArray type are still the best way
to go at the C level.

To build extensions with one algorithm description, it seems the best
way is some macro type pseudo C code that can be parsed to generate
multiple C functions for each type. Do you agree?

This seems to justify the approach of the PDL project (perl) and the
PP language that is employed for general extensions. (and I was so
hoping to keep this at the straight forward C level to exploit the
beauty of ruby's C API. ahh well - it is just C).

Any additional comments or suggestions are welcome, I'd love to find
something I missed.

Thanks!

Cameron

----
Benchmarking based on:
num_runs =3D 1000
data_size =3D 100000
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
=3D=3D> INT <=3D=3D
creating vectors: NArray.int(data_size).random!(100)

user system total real
NArray_rb 1.050000 0.410000 1.460000 ( 1.459850)
NArray_mod 2.670000 0.030000 2.700000 ( 2.706433)
NArray_ct 2.220000 1.600000 3.820000 ( 3.819964)
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
=3D=3D> SFLOAT <=3D=3D
creating vectors: NArray.sfloat(data_size).random!(100)

user system total real
NArray_rb 2.590000 0.340000 2.930000 ( 2.928844)
NArray_mod 2.440000 0.020000 2.460000 ( 2.467045)
NArray_ct 2.100000 1.610000 3.710000 ( 3.715267)
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
=3D=3D> FLOAT <=3D=3D
creating vectors: NArray.float(data_size).random!(100)

user system total real
NArray_rb 2.800000 0.800000 3.600000 ( 3.605653)
NArray_mod 3.920000 0.010000 3.930000 ( 3.934259)
NArray_ct 1.020000 0.000000 1.020000 ( 1.026064)
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
=3D=3D> COMPLEX <=3D=3D
creating vectors: NArray.complex(data_size).random!(100)

user system total real
NArray_rb 5.970000 1.420000 7.390000 ( 7.390152)
NArray_mod 3.950000 0.020000 3.970000 ( 3.971336)
NArray_ct 2.820000 1.590000 4.410000 ( 4.411596)

------=_Part_8007_29965385.1139814889253
Content-Type: application/x-ruby; name=na_wmean.rb
Content-Transfer-Encoding: 7bit
X-Attachment-Id: f_ejmfutxc
Content-Disposition: attachment; filename="na_wmean.rb"

#!/usr/bin/env ruby

require 'rubygems'
require 'narray'
require 'inline'

class NArray

inline do |builder|
builder.add_compile_flags %q(-I /export/home/cameron/sys/narray-0.5.8/)
builder.include '"narray.h"'
builder.include '"narray_local.h"' # few local things used in linspace

builder.c_raw <<-'END_CODE'
VALUE
wmean_dbl(int argc, VALUE *argv, VALUE self) {

int i;
struct NARRAY *nv, *nw;
double p_sum = 0.0, w_sum = 0.0;

GetNArray(self, nv);
GetNArray(argv[0], nw);

if(nv->total != nw->total)
rb_raise( rb_eArgError, "Vector and weight must be same size!" );

for(i=0 ; i < nv->total ; i++) {
p_sum += ((double *)nw->ptr) * ((double *)nv->ptr);
w_sum += ((double *)nw->ptr);
}

return rb_float_new( p_sum / w_sum);
}
END_CODE
builder.c_raw <<-'END_CODE'
VALUE
wmean_ct(int argc, VALUE *argv, VALUE self) {

int i;
struct NARRAY *nv, *nw;
double p_sum = 0.0, w_sum = 0.0;
VALUE vself, varg0;

vself = na_change_type(self, NA_DFLOAT);
varg0 = na_change_type(argv[0], NA_DFLOAT);

GetNArray(vself, nv);
GetNArray(varg0, nw);

if(nv->total != nw->total)
rb_raise( rb_eArgError, "Vector and weight must be same size!" );

for(i=0 ; i < nv->total ; i++) {
p_sum += ((double *)nw->ptr) * ((double *)nv->ptr);
w_sum += ((double *)nw->ptr);
}

return rb_float_new( p_sum / w_sum);
}
END_CODE

builder.c_raw <<-'END_CODE'
VALUE
wmean(int argc, VALUE *argv, VALUE self) {

int i,sv,sw;
struct NARRAY *nv, *nw;
double wt,val;
double p_sum = 0.0, w_sum = 0.0;
char *v,*w;
void (*na_getv)();
void (*na_getw)();

GetNArray(self, nv);
GetNArray(argv[0], nw);

if(nv->total != nw->total)
rb_raise( rb_eArgError, "Vector and weight must be same size!" );

na_getv = SetFuncs[NA_DFLOAT][nv->type];
na_getw = SetFuncs[NA_DFLOAT][nw->type];

sv = na_sizeof[nv->type];
sw = na_sizeof[nw->type];

v = nv->ptr;
w = nw->ptr;

for(i=0 ; i < nv->total ; i++) {
(*na_getv)( 1, &val, 0, v, 0 );
(*na_getw)( 1, &wt, 0, w, 0 );

v += sv;
w += sw;

p_sum += (wt) * (val);
w_sum += (wt);
}

return rb_float_new( p_sum / w_sum);
}
END_CODE
end

end










------=_Part_8007_29965385.1139814889253
Content-Type: application/x-ruby; name=bench_wmean.rb
Content-Transfer-Encoding: 7bit
X-Attachment-Id: f_ejmfv8zn
Content-Disposition: attachment; filename="bench_wmean.rb"

#!/usr/bin/env ruby

require 'benchmark'
include Benchmark

# require 'gsl'
require 'na_wmean.rb'

n = 1_000
data_size = 100_000

def wmean_orig(xt,wt)
(xt * wt).sum / wt.sum
end

puts "Benchmarking based on: "
puts " num_runs = #{n}"
puts " data_size = #{data_size}"

x = w = nil
for type in ["int", "sfloat", "float", "complex"]

puts "="*60
puts "==> " + type.upcase + " <=="
create = "NArray.#{type}(data_size).random!(100)"
puts "creating vectors: #{create}"
puts
eval "x = #{create}"
eval "w = #{create}"
# x = NArray.int(data_size).random!(100)
# w = NArray.int(data_size).random!(100)

# gx = x.to_gv
# gw = w.to_gv

bmbm(12) do |be|
be.report("NArray_rb") { n.times { m = wmean_orig(x,w)} }
be.report("NArray_mod") { n.times { m = x.wmean(w)} }
# be.report("NArray_dbl") { n.times { m = x.wmean_dbl(w)} }
be.report("NArray_ct") { n.times { m = x.wmean_ct(w)} }
# be.report("GSL") { n.times { m = gx.wmean(gw)} }
end
end










------=_Part_8007_29965385.1139814889253--
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Members online

No members online now.

Forum statistics

Threads
473,770
Messages
2,569,584
Members
45,075
Latest member
MakersCBDBloodSupport

Latest Threads

Top