J

#### jacob navia

I am doing a series of benchmarks against gcc, and I found out that the

results of my benchmark would change from version to version of gcc,

i.e. the 2.95 version would give different results than 3.4 version.

This is strange since under windows, MSVC, intel and lcc-win32

give exactly the same results...

This is not *per se* a bug, but I would like to know what

results you get. The program in question is a matrix

manipulation program that uses heavily floating point.

Just mail as an answer to this message the output you

get with a small note of machine, and gcc version (or

other compiler of course).

Thanks in advance. The program compiles under windows and linux

without modifications. I did not write this program, so please

ignore questions of style.

jacob

-----------------------------------------------cut here

/* SCCS ID @(#)ge.h 1.1 2/4/86 */

/***************************************************************

******************************************************************

**** Matrix data structure(s) for Gaussian Elimination ****

******************************************************************

****************************************************************/

#include <stdio.h>

#include <stdlib.h>

#include <string.h>

#include <math.h>

#ifdef __LCC__

#include <fenv.h>

#endif

double sasum(int n,double * sx,int incx );

int isamax( int n, double *sx, int incx );

/*

This file contains the definitions of the structures used in

various algorithms for doing Gaussian Elimination.

The following gives an array (of length 10) of pointers to doubles.

double *a[10];

Now assume that each a

*points to space for an array of doubles*

(gotten

by a call to malloc, say).

Then the following is true:

a

(gotten

by a call to malloc, say).

Then the following is true:

a

*can be thought of as a pointer to the i-th array of*

doubles,

*(adoubles,

*(a

*+j) is the j-th element of the i-th array.*

The following shows how to reference things for the definition of

the FULL

structure given below.

a->cd is the value of (as apposed to a pointer to) the column

dimension.

a->rd is the value of (as apposed to a pointer to) the row dimension.

a->pd[j] is a pointer to the j-th column (an array of doubles).

*(a->pd[j]+i) is the i-th element of the j-th column, viz., a(i,j).

Here we think, as is natural in C, of all matrices and vectors indexed

from 0 insead of 1.

*/

#define MAXCOL 1000 /* Maximum number of Columns. */

struct FULL { /* Struct definition for the FULL matrix

structure. */

int cd; /* Column dimension of the matrix. */

int rd; /* Row Dimension of the matrix. */

double *pd[MAXCOL]; /* Array of pointers to the columns of a

matrix. */

};

void sgesl(struct FULL * a,int * ipvt,double b[],int job );

void saxpy(int n,double sa, double *sx,int incx,double * sy,int incy );

/* The following macro will get a(r,c) from a matrix in the FULL

structure. */

#define elem(a,r,c) (*(a.pd[(c)]+(r)))

/* The following macro will get a(r,c) from a pointer to a matrix

in the FULL structure. */

#define pelem(a,r,c) (*(a->pd[(c)]+(r)))

int sgeco( struct FULL *a,int * ipvt, double *rcond,double * z );

void matgen( int n,struct FULL * a,double * b );

int sgefa(struct FULL * a,int * ipvt );

double copysign(double,double);

double second(void);

double sdot(int n,double * sx,int incx,double * sy, int incy );

/***************************************************************

******************************************************************

**** Sample driver for SGEFA/SL and CO ****

******************************************************************

****************************************************************/

/*

* SYSTEM DEPENENT ROUTINES:

* double second(); Returns cpu time used in seconds

*/

#define MAXN 500 /* Maximum problem size. */

int main()

{

/* Storage for the linear system and associates. */

struct FULL a;

double x[MAXCOL], b[MAXCOL], z[MAXCOL], rcond;

int ipvt[MAXCOL];

#ifdef __LCC__

DoublePrecision(); // Set 53 bits preision

#endif

/* Local vars. */

double opsfa, opsco, opssl, xerrnrm, resnrm, xnrm;

double tfai, tfao, tsli, tslo, tcoi, tcoo;

int n, i, j, retval;

#ifdef sun

/* The following is a kludge for correct Sun FPA core dumps. */

unsigned newmode = 62464;

fpmode_(&newmode);

#endif

memset(&a,0,sizeof(a));

for( n=50; n<=MAXN; n+=50 ) {

/* Generate the linear system. */

matgen( n, &a, b );

opsfa = 1.0E-6*((2.0e0*(double)n*(double)n*(double)n)/3.0e0 +

2.0e0*(double)n*(double)n);

opsco = opsfa + 1.0E-6*(6.0e0*(double)n*(double)n);

opssl = 1.0e-6*(2.0e0*(double)n*(double)n);

/* Factor. */

tfai = second();

retval = sgefa( &a, ipvt );

tfao = second();

if( retval )

printf("Zero Column %d found.\n", retval );

else {

/* Solve the system. */

tsli = second();

sgesl( &a, ipvt, b, 0 );

tslo = second();

/* Compute a residual to verify results. */

for( i=0; i<n; i++ )

xThe following shows how to reference things for the definition of

the FULL

structure given below.

a->cd is the value of (as apposed to a pointer to) the column

dimension.

a->rd is the value of (as apposed to a pointer to) the row dimension.

a->pd[j] is a pointer to the j-th column (an array of doubles).

*(a->pd[j]+i) is the i-th element of the j-th column, viz., a(i,j).

Here we think, as is natural in C, of all matrices and vectors indexed

from 0 insead of 1.

*/

#define MAXCOL 1000 /* Maximum number of Columns. */

struct FULL { /* Struct definition for the FULL matrix

structure. */

int cd; /* Column dimension of the matrix. */

int rd; /* Row Dimension of the matrix. */

double *pd[MAXCOL]; /* Array of pointers to the columns of a

matrix. */

};

void sgesl(struct FULL * a,int * ipvt,double b[],int job );

void saxpy(int n,double sa, double *sx,int incx,double * sy,int incy );

/* The following macro will get a(r,c) from a matrix in the FULL

structure. */

#define elem(a,r,c) (*(a.pd[(c)]+(r)))

/* The following macro will get a(r,c) from a pointer to a matrix

in the FULL structure. */

#define pelem(a,r,c) (*(a->pd[(c)]+(r)))

int sgeco( struct FULL *a,int * ipvt, double *rcond,double * z );

void matgen( int n,struct FULL * a,double * b );

int sgefa(struct FULL * a,int * ipvt );

double copysign(double,double);

double second(void);

double sdot(int n,double * sx,int incx,double * sy, int incy );

/***************************************************************

******************************************************************

**** Sample driver for SGEFA/SL and CO ****

******************************************************************

****************************************************************/

/*

* SYSTEM DEPENENT ROUTINES:

* double second(); Returns cpu time used in seconds

*/

#define MAXN 500 /* Maximum problem size. */

int main()

{

/* Storage for the linear system and associates. */

struct FULL a;

double x[MAXCOL], b[MAXCOL], z[MAXCOL], rcond;

int ipvt[MAXCOL];

#ifdef __LCC__

DoublePrecision(); // Set 53 bits preision

#endif

/* Local vars. */

double opsfa, opsco, opssl, xerrnrm, resnrm, xnrm;

double tfai, tfao, tsli, tslo, tcoi, tcoo;

int n, i, j, retval;

#ifdef sun

/* The following is a kludge for correct Sun FPA core dumps. */

unsigned newmode = 62464;

fpmode_(&newmode);

#endif

memset(&a,0,sizeof(a));

for( n=50; n<=MAXN; n+=50 ) {

/* Generate the linear system. */

matgen( n, &a, b );

opsfa = 1.0E-6*((2.0e0*(double)n*(double)n*(double)n)/3.0e0 +

2.0e0*(double)n*(double)n);

opsco = opsfa + 1.0E-6*(6.0e0*(double)n*(double)n);

opssl = 1.0e-6*(2.0e0*(double)n*(double)n);

/* Factor. */

tfai = second();

retval = sgefa( &a, ipvt );

tfao = second();

if( retval )

printf("Zero Column %d found.\n", retval );

else {

/* Solve the system. */

tsli = second();

sgesl( &a, ipvt, b, 0 );

tslo = second();

/* Compute a residual to verify results. */

for( i=0; i<n; i++ )

x

*= b**;*

matgen( n, &a, b );

for( j=0; j<n; j++ ) {

register double *col = a.pd[j];

register double xj = x[j];

for( i=0; i<n; i++ ) {

bmatgen( n, &a, b );

for( j=0; j<n; j++ ) {

register double *col = a.pd[j];

register double xj = x[j];

for( i=0; i<n; i++ ) {

b

*-= col***xj;*

}

}

xerrnrm = 0.0;

resnrm = 0.0;

for( i=0; i<n; i++ ) {

xerrnrm += ((double)x}

}

xerrnrm = 0.0;

resnrm = 0.0;

for( i=0; i<n; i++ ) {

xerrnrm += ((double)x

*-1.0)*((double)x**-1.0);*

resnrm += (double)bresnrm += (double)b

**(double)b**;*

}

xnrm = sqrt( (double)n );

xerrnrm = sqrt( xerrnrm )/xnrm;

resnrm = sqrt( resnrm )/xnrm;

/* Now factor and est condition number. */

tcoi = second();

retval = sgeco( &a, ipvt, &rcond, z );

tcoo = second();

printf("\n\n\tLinpack Benchmark in C of size %d\n",n);

printf(" 1/COND(A) (Condition number of A) = %15e\n",rcond);

printf(" ||x-X||/||X|| (Solution Error) = %15e\n",xerrnrm);

printf(" ||b-Ax||/||X|| (Residual Error) = %15e\n",resnrm);

printf("\tWhere X = True solution and and x = computed solution\n");

}

}

return 0;

}

void matgen(int n,struct FULL * a,double * b )

{

/* This routine generates a struct FULL matrix */

int i, j, init = 1325;

/* Allocate/Deallocate space for the columns. */

for( i=0; i<n; i++ ) {

if( a->pd}

xnrm = sqrt( (double)n );

xerrnrm = sqrt( xerrnrm )/xnrm;

resnrm = sqrt( resnrm )/xnrm;

/* Now factor and est condition number. */

tcoi = second();

retval = sgeco( &a, ipvt, &rcond, z );

tcoo = second();

printf("\n\n\tLinpack Benchmark in C of size %d\n",n);

printf(" 1/COND(A) (Condition number of A) = %15e\n",rcond);

printf(" ||x-X||/||X|| (Solution Error) = %15e\n",xerrnrm);

printf(" ||b-Ax||/||X|| (Residual Error) = %15e\n",resnrm);

printf("\tWhere X = True solution and and x = computed solution\n");

}

}

return 0;

}

void matgen(int n,struct FULL * a,double * b )

{

/* This routine generates a struct FULL matrix */

int i, j, init = 1325;

/* Allocate/Deallocate space for the columns. */

for( i=0; i<n; i++ ) {

if( a->pd

*!= NULL ) (void)free( a->pd**);*

if( (a->pdif( (a->pd

*= (double *)malloc( (unsigned)n*sizeof(double) ))*

== NULL ) {

fprintf(stderr, "MATGEN: Error allocating matrix for

n=%d\n",n);

exit( 1 );

}

}

/* Set the matrix elements and right hand side. */

a->cd = n;

a->rd = n;

for( j=0; j<n; j++ ) {

register double *col = a->pd[j];

for( i=0; i<n; i++ ) {

init = 3125*init % 65536;

col== NULL ) {

fprintf(stderr, "MATGEN: Error allocating matrix for

n=%d\n",n);

exit( 1 );

}

}

/* Set the matrix elements and right hand side. */

a->cd = n;

a->rd = n;

for( j=0; j<n; j++ ) {

register double *col = a->pd[j];

for( i=0; i<n; i++ ) {

init = 3125*init % 65536;

col

*= ((double)init-32768.0)/16384.0;*

}

}

for( j=0; j<n; j++ ) b[j] = 0.0;

for( j=0; j<n; j++ ) {

register double *col = a->pd[j];

for( i=0; i<n; i++ ) {

b}

}

for( j=0; j<n; j++ ) b[j] = 0.0;

for( j=0; j<n; j++ ) {

register double *col = a->pd[j];

for( i=0; i<n; i++ ) {

b

*+= col**;*

}

}

}

/****************************************************************************

* Routines for getting elapsed CPU usage.

****************************************************************************/

#include <time.h>

static int epsilon=0;

double second(void)

/****************************************************************************

* Returns the total cpu time used in seconds.

* Emulates the Cray fortran library function of the same name.

****************************************************************************/

{

epsilon++;

return( (double)epsilon + ((double)clock())/1000.0 );

}

void sscal(int n,double sa,double * sx,int incx );

/***************************************************************

******************************************************************

**** Gaussian Elimination with partial pivoting ****

**** and condition number estimation. ****

**** This file contains the factorization driver and ****

**** contion number estimation routine SGECO ****

******************************************************************

****************************************************************/

int sgeco(struct FULL * a,int * ipvt,double * rcond,double * z )

/*

PURPOSE

SGECO factors a real matrix by gaussian elimination

and estimates the condition of the matrix.

REMARKS

If rcond is not needed, SGEFA is slightly faster.

to solve A*x = b , follow SGECO by SGESL.

To compute inverse(A)*c , follow SGECO by SGESL.

To compute determinant(A) , follow SGECO by SGEDI.

To compute inverse(A) , follow SGECO by SGEDI.

INPUT

a A pointer to the FULL matrix structure.

See the definition in ge.h.

OUTPUT

a A pointer to the FULL matrix structure containing

an upper triangular matrix and the multipliers

which were used to obtain it.

The factorization can be written a = l*u where

l is a product of permutation and unit lower

triangular matrices and u is upper triangular.

ipvt An integer vector (of length a->cd) of pivot indices.

rcond A double estimate of the reciprocal condition of A .

for the system A*x = b , relative perturbations

in A and b of size epsilon may cause

relative perturbations in x of size epsilon/rcond .

If rcond is so small that the logical expression

1.0 + rcond .eq. 1.0

is true, then a may be singular to working

precision. In particular, rcond is zero if

exact singularity is detected or the estimate

underflows.

z A double vector (of length a->cd) for a work vector

whose contents are usually unimportant. If A is

close to a singular matrix, then z is an approx-

imate null vector in the sense that:

norm(a*z) = rcond*norm(a)*norm(z) .

RETURNS

= -1 Matrix is not square.

= 0 Normal return value.

= k if u(k,k) .eq. 0.0 . This is not an error

condition for this subroutine, but it does

indicate that sgesl or sgedi will divide by zero

if called. Use rcond in sgeco for a reliable

indication of singularity.

ROUTINES

SGEFA(), blas sasum() and sdot(), copysign(), fabs();

WARNINGS

This routine uses the UN*X math library routines

copysign() and fabs().

*/

{

register int j;

int k, n, info ;

register double s;

double ek, anorm, ynorm;

n = a->cd;

/* Compute 1-norm of A. */

for( j=0, anorm=0.0; j<n; j++ ) {

register double sum;

sum = sasum( n, a->pd[j], 1 );

anorm = (double)sum > anorm ? (double)sum : anorm;

}

/* Factor A. */

info = sgefa( a, ipvt );

/*

* rcond = 1/(norm(a)*(estimate of norm(inverse(a)))) .

* estimate = norm(z)/norm(y) where a*z = y and trans(a)*y = e .

* trans(a) is the transpose of a . The components of e are

* chosen to cause maximum local growth in the elements of w where

* trans(u)*w = e . The vectors are frequently rescaled to avoid

* overflow.

*/

/* solve trans(u)*w = e */

ek = 1.0;

for( j=0; j<n; j++ ) {

z[j] = 0.0e0;

}

for( k=0; k<n; k++ ) {

register double zk = z[k];

double wk, wkm, sm;

int kp1 = k+1;

if( zk != 0.0 ) ek = (double)copysign( (double)ek, (double)-zk );

if( fabs( (double)(ek-zk) ) > fabs( (double)pelem(a,k,k) ) ) {

s = (double)fabs( (double)pelem(a,k,k) )/(double)fabs(

(double)(ek-zk) );

sscal( n, (double)s, z, 1 );

zk = z[k];

ek *= s;

}

wk = ek - zk;

wkm = -ek - zk;

s = (double)fabs( (double)wk );

sm = (double)fabs( (double)wkm );

if( pelem(a,k,k) == 0.0 ) {

wk = 1.0;

wkm = 1.0;

}

else {

wk /= pelem(a,k,k);

wkm /= pelem(a,k,k);

}

if( kp1<n ) {

for( j=kp1; j<n; j++ ) {

sm = sm + (double)fabs( (double)(z[j]+wkm*pelem(a,k,j)) );

z[j] += ((double)wk)*pelem(a,k,j);

s += (double)fabs( (double)z[j] );

}

if( s < sm ) {

register double t = wkm-wk;

wk = wkm;

for( j=kp1; j<n; j++ ) {

z[j] += t*pelem(a,k,j);

}

}

}

z[k] = wk;

}

s = 1.0/(double)sasum( n, z, 1 );

sscal( n, (double)s, z, 1 );

/* Solve trans(L)*y = w. */

for( k=n-1; k>=0; k-- ) {

register int l;

register double t;

if( k < n-1 ) z[k] += (double)sdot( n-k-1, a->pd[k]+k+1, 1,

(z+k+1), 1 );

if( fabs( (double)z[k] ) > 1.0 ) {

s = 1.0/(double)fabs( (double)z[k] );

sscal( n, (double)s, z, 1 );

}

l = ipvt[k];

t = z[l];

z[l] = z[k];

z[k] = t;

}

s = 1.0/(double)sasum( n, z, 1 );

sscal( n, (double)s, z, 1);

ynorm = 1.0;

/* solve L*v = y. */

for( k=0; k<n; k++ ) {

register int l;

register double t;

l = ipvt[k];

t = z[l];

z[l] = z[k];

z[k] = t;

if( k < n-1 ) saxpy( n-k-1, (double)t, (a->pd[k]+k+1), 1,

(z+k+1), 1 );

if( fabs( (double)z[k] ) > 1.0) {

s = 1.0/(double)fabs( (double)z[k] );

sscal( n, (double)s, z, 1 );

ynorm *= s;

}

}

s = 1.0/(double)sasum( n, z, 1 );

sscal( n, (double)s, z, 1 );

ynorm *= s;

/* Solve U*z = v. */

for( k=n-1; k>=0; k-- ) {

register double t;

if( fabs( (double)z[k] ) > fabs( (double)pelem(a,k,k) ) ) {

s = (double)fabs( (double)pelem(a,k,k) )/(double)fabs(

(double)z[k] );

sscal( n, (double)s, z, 1 );

ynorm *= s;

}

if( pelem(a,k,k) == 0.0 ) z[k] = 1.0;

else z[k] /= pelem(a,k,k);

t = -z[k];

saxpy( k, (double)t, a->pd[k], 1, z, 1 );

}

/* Make znorm = 1.0. */

s = 1.0/(double)sasum( n, z, 1 );

sscal( n, (double)s, z, 1 );

ynorm *= s;

if( anorm == 0.0e0) *rcond = 0.0;

else *rcond = ynorm/anorm;

return( info );

}

/***************************************************************

******************************************************************

**** Gaussian Elimination with partial pivoting. ****

**** This file contains the factorization routine SGEFA ****

******************************************************************

****************************************************************/

int sgefa(struct FULL *a,int * ipvt )

/*

PURPOSE

SGEFA factors a real matrix by gaussian elimination.

REMARKS

SGEFA is usually called by SGECO, but it can be called

directly with a saving in time if rcond is not needed.

(time for SGECO) = (1 + 9/n)*(time for SGEFA) .

INPUT

a A pointer to the FULL matrix structure.

See the definition in ge.h.

OUTPUT

a A pointer to the FULL matrix structure containing

an upper triangular matrix and the multipliers

which were used to obtain it.

The factorization can be written a = l*u where

l is a product of permutation and unit lower

triangular matrices and u is upper triangular.

ipvt An integer vector (of length a->cd) of pivot indices.

RETURNS

= -1 Matrix is not square.

= 0 Normal return value.

= k if u(k,k) .eq. 0.0 . This is not an error

condition for this subroutine, but it does

indicate that sgesl or sgedi will divide by zero

if called. Use rcond in sgeco for a reliable

indication of singularity.

ROUTINES

blas ISAMAX

*/

{

register int i, j;

int isamax(), k, l, nm1, info, n;

double *akk, *alk;

register double t, *mik;

/* Gaussian elimination with partial pivoting. */

if( a->cd != a->rd ) return( -1 );

n = a->cd;

nm1 = n - 1;

akk = a->pd[0];

info = 0; /* Assume nothing will go wrong! */

if( n < 2 ) goto CLEAN_UP;

/* Loop over Diagional */

for( k=0; k<nm1; k++, ipvt++ ) {

/* Find index of max elem in col below the diagional (l = pivot

index). */

akk = a->pd[k] + k;

l = isamax( n-k, akk, 1 ) + k;

*ipvt = l;

/* Zero pivot implies this column already triangularized. */

alk = a->pd[k] + l;

if( *alk == 0.0e0) {

info = k;

continue;

}

/* Interchange a(k,k) and a(l,k) if necessary. */

if( l != k ) {

t = *alk;

*alk = *akk;

*akk = t;

}

/* Compute multipliers for this column. */

t = -1.0e0 / (*akk);

for( i=k+1, mik=a->pd[k]; i<n; i++ )

mik}

}

}

/****************************************************************************

* Routines for getting elapsed CPU usage.

****************************************************************************/

#include <time.h>

static int epsilon=0;

double second(void)

/****************************************************************************

* Returns the total cpu time used in seconds.

* Emulates the Cray fortran library function of the same name.

****************************************************************************/

{

epsilon++;

return( (double)epsilon + ((double)clock())/1000.0 );

}

void sscal(int n,double sa,double * sx,int incx );

/***************************************************************

******************************************************************

**** Gaussian Elimination with partial pivoting ****

**** and condition number estimation. ****

**** This file contains the factorization driver and ****

**** contion number estimation routine SGECO ****

******************************************************************

****************************************************************/

int sgeco(struct FULL * a,int * ipvt,double * rcond,double * z )

/*

PURPOSE

SGECO factors a real matrix by gaussian elimination

and estimates the condition of the matrix.

REMARKS

If rcond is not needed, SGEFA is slightly faster.

to solve A*x = b , follow SGECO by SGESL.

To compute inverse(A)*c , follow SGECO by SGESL.

To compute determinant(A) , follow SGECO by SGEDI.

To compute inverse(A) , follow SGECO by SGEDI.

INPUT

a A pointer to the FULL matrix structure.

See the definition in ge.h.

OUTPUT

a A pointer to the FULL matrix structure containing

an upper triangular matrix and the multipliers

which were used to obtain it.

The factorization can be written a = l*u where

l is a product of permutation and unit lower

triangular matrices and u is upper triangular.

ipvt An integer vector (of length a->cd) of pivot indices.

rcond A double estimate of the reciprocal condition of A .

for the system A*x = b , relative perturbations

in A and b of size epsilon may cause

relative perturbations in x of size epsilon/rcond .

If rcond is so small that the logical expression

1.0 + rcond .eq. 1.0

is true, then a may be singular to working

precision. In particular, rcond is zero if

exact singularity is detected or the estimate

underflows.

z A double vector (of length a->cd) for a work vector

whose contents are usually unimportant. If A is

close to a singular matrix, then z is an approx-

imate null vector in the sense that:

norm(a*z) = rcond*norm(a)*norm(z) .

RETURNS

= -1 Matrix is not square.

= 0 Normal return value.

= k if u(k,k) .eq. 0.0 . This is not an error

condition for this subroutine, but it does

indicate that sgesl or sgedi will divide by zero

if called. Use rcond in sgeco for a reliable

indication of singularity.

ROUTINES

SGEFA(), blas sasum() and sdot(), copysign(), fabs();

WARNINGS

This routine uses the UN*X math library routines

copysign() and fabs().

*/

{

register int j;

int k, n, info ;

register double s;

double ek, anorm, ynorm;

n = a->cd;

/* Compute 1-norm of A. */

for( j=0, anorm=0.0; j<n; j++ ) {

register double sum;

sum = sasum( n, a->pd[j], 1 );

anorm = (double)sum > anorm ? (double)sum : anorm;

}

/* Factor A. */

info = sgefa( a, ipvt );

/*

* rcond = 1/(norm(a)*(estimate of norm(inverse(a)))) .

* estimate = norm(z)/norm(y) where a*z = y and trans(a)*y = e .

* trans(a) is the transpose of a . The components of e are

* chosen to cause maximum local growth in the elements of w where

* trans(u)*w = e . The vectors are frequently rescaled to avoid

* overflow.

*/

/* solve trans(u)*w = e */

ek = 1.0;

for( j=0; j<n; j++ ) {

z[j] = 0.0e0;

}

for( k=0; k<n; k++ ) {

register double zk = z[k];

double wk, wkm, sm;

int kp1 = k+1;

if( zk != 0.0 ) ek = (double)copysign( (double)ek, (double)-zk );

if( fabs( (double)(ek-zk) ) > fabs( (double)pelem(a,k,k) ) ) {

s = (double)fabs( (double)pelem(a,k,k) )/(double)fabs(

(double)(ek-zk) );

sscal( n, (double)s, z, 1 );

zk = z[k];

ek *= s;

}

wk = ek - zk;

wkm = -ek - zk;

s = (double)fabs( (double)wk );

sm = (double)fabs( (double)wkm );

if( pelem(a,k,k) == 0.0 ) {

wk = 1.0;

wkm = 1.0;

}

else {

wk /= pelem(a,k,k);

wkm /= pelem(a,k,k);

}

if( kp1<n ) {

for( j=kp1; j<n; j++ ) {

sm = sm + (double)fabs( (double)(z[j]+wkm*pelem(a,k,j)) );

z[j] += ((double)wk)*pelem(a,k,j);

s += (double)fabs( (double)z[j] );

}

if( s < sm ) {

register double t = wkm-wk;

wk = wkm;

for( j=kp1; j<n; j++ ) {

z[j] += t*pelem(a,k,j);

}

}

}

z[k] = wk;

}

s = 1.0/(double)sasum( n, z, 1 );

sscal( n, (double)s, z, 1 );

/* Solve trans(L)*y = w. */

for( k=n-1; k>=0; k-- ) {

register int l;

register double t;

if( k < n-1 ) z[k] += (double)sdot( n-k-1, a->pd[k]+k+1, 1,

(z+k+1), 1 );

if( fabs( (double)z[k] ) > 1.0 ) {

s = 1.0/(double)fabs( (double)z[k] );

sscal( n, (double)s, z, 1 );

}

l = ipvt[k];

t = z[l];

z[l] = z[k];

z[k] = t;

}

s = 1.0/(double)sasum( n, z, 1 );

sscal( n, (double)s, z, 1);

ynorm = 1.0;

/* solve L*v = y. */

for( k=0; k<n; k++ ) {

register int l;

register double t;

l = ipvt[k];

t = z[l];

z[l] = z[k];

z[k] = t;

if( k < n-1 ) saxpy( n-k-1, (double)t, (a->pd[k]+k+1), 1,

(z+k+1), 1 );

if( fabs( (double)z[k] ) > 1.0) {

s = 1.0/(double)fabs( (double)z[k] );

sscal( n, (double)s, z, 1 );

ynorm *= s;

}

}

s = 1.0/(double)sasum( n, z, 1 );

sscal( n, (double)s, z, 1 );

ynorm *= s;

/* Solve U*z = v. */

for( k=n-1; k>=0; k-- ) {

register double t;

if( fabs( (double)z[k] ) > fabs( (double)pelem(a,k,k) ) ) {

s = (double)fabs( (double)pelem(a,k,k) )/(double)fabs(

(double)z[k] );

sscal( n, (double)s, z, 1 );

ynorm *= s;

}

if( pelem(a,k,k) == 0.0 ) z[k] = 1.0;

else z[k] /= pelem(a,k,k);

t = -z[k];

saxpy( k, (double)t, a->pd[k], 1, z, 1 );

}

/* Make znorm = 1.0. */

s = 1.0/(double)sasum( n, z, 1 );

sscal( n, (double)s, z, 1 );

ynorm *= s;

if( anorm == 0.0e0) *rcond = 0.0;

else *rcond = ynorm/anorm;

return( info );

}

/***************************************************************

******************************************************************

**** Gaussian Elimination with partial pivoting. ****

**** This file contains the factorization routine SGEFA ****

******************************************************************

****************************************************************/

int sgefa(struct FULL *a,int * ipvt )

/*

PURPOSE

SGEFA factors a real matrix by gaussian elimination.

REMARKS

SGEFA is usually called by SGECO, but it can be called

directly with a saving in time if rcond is not needed.

(time for SGECO) = (1 + 9/n)*(time for SGEFA) .

INPUT

a A pointer to the FULL matrix structure.

See the definition in ge.h.

OUTPUT

a A pointer to the FULL matrix structure containing

an upper triangular matrix and the multipliers

which were used to obtain it.

The factorization can be written a = l*u where

l is a product of permutation and unit lower

triangular matrices and u is upper triangular.

ipvt An integer vector (of length a->cd) of pivot indices.

RETURNS

= -1 Matrix is not square.

= 0 Normal return value.

= k if u(k,k) .eq. 0.0 . This is not an error

condition for this subroutine, but it does

indicate that sgesl or sgedi will divide by zero

if called. Use rcond in sgeco for a reliable

indication of singularity.

ROUTINES

blas ISAMAX

*/

{

register int i, j;

int isamax(), k, l, nm1, info, n;

double *akk, *alk;

register double t, *mik;

/* Gaussian elimination with partial pivoting. */

if( a->cd != a->rd ) return( -1 );

n = a->cd;

nm1 = n - 1;

akk = a->pd[0];

info = 0; /* Assume nothing will go wrong! */

if( n < 2 ) goto CLEAN_UP;

/* Loop over Diagional */

for( k=0; k<nm1; k++, ipvt++ ) {

/* Find index of max elem in col below the diagional (l = pivot

index). */

akk = a->pd[k] + k;

l = isamax( n-k, akk, 1 ) + k;

*ipvt = l;

/* Zero pivot implies this column already triangularized. */

alk = a->pd[k] + l;

if( *alk == 0.0e0) {

info = k;

continue;

}

/* Interchange a(k,k) and a(l,k) if necessary. */

if( l != k ) {

t = *alk;

*alk = *akk;

*akk = t;

}

/* Compute multipliers for this column. */

t = -1.0e0 / (*akk);

for( i=k+1, mik=a->pd[k]; i<n; i++ )

mik

**= t;*

/* Column elimination with row indexing. */

if( l != k ) {

/* Interchange a(k,j) and a(l,j) if necessary. */

for( j=k+1; j<n; j++ ) {

t = pelem(a,k,j);

pelem(a,k,j) = pelem(a,l,j);

pelem(a,l,j) = t;

}

}

for( j=k+1; j<n; j++ ) {

register double *aij = a->pd[j];

t = pelem(a,k,j);

for( i=k+1, mik=a->pd[k]; i<n; i++ )

aij/* Column elimination with row indexing. */

if( l != k ) {

/* Interchange a(k,j) and a(l,j) if necessary. */

for( j=k+1; j<n; j++ ) {

t = pelem(a,k,j);

pelem(a,k,j) = pelem(a,l,j);

pelem(a,l,j) = t;

}

}

for( j=k+1; j<n; j++ ) {

register double *aij = a->pd[j];

t = pelem(a,k,j);

for( i=k+1, mik=a->pd[k]; i<n; i++ )

aij

*+= t*mik**;*

}

} /* End of for k loop */

CLEAN_UP:

*ipvt = nm1;

if( *akk == 0.0e0 ) info = n;

return( info );

}

/***************************************************************

******************************************************************

**** Gaussian Elimination with partial pivoting. ****

**** This file contains the solution routine SGESL ****

******************************************************************

****************************************************************/

void sgesl(struct FULL * a,int * ipvt,double b[],int job )

/*

PURPOSE

SGESL solves the real system

a * x = b or trans(a) * x = b

using the factors computed by SGECO or SGEFA.

INPUT

a A pointer to the FULL matrix structure containing the

factored

matrix. See the definition of FULL in ge.h.

ipvt The pivot vector (of length a->cd) from SGECO or SGEFA.

b The right hand side vector (of length a->cd).

job = 0 to solve a*x = b ,

= nonzero to solve trans(a)*x = b where

trans(a) is the transpose.

OUTPUT

b The solution vector x.

REMARKS

Error condition:

A division by zero will occur if the input factor contains a

zero on the diagonal. Technically this indicates singularity

but it is often caused by improper arguments or improper

setting of lda . It will not occur if the subroutines are

called correctly and if sgeco has set rcond .gt. 0.0

or sgefa has set info .eq. 0 .

*/

{

register double t, *mik;

double *akk;

register int i, k;

int l, n, nm1;

n = a->cd;

nm1 = n - 1;

/* job = 0 , solve A * x = b. */

if( job == 0 ) {

/* Forward elimination. Solve L*y = b. */

for( k=0; k<nm1; k++ ) {

akk = a->pd[k] + k; /* akk points to a(k,k). */

/* Interchange b[k] and b[l] if necessary. */

l = ipvt[k];

t = b[l];

if( l != k ) {

b[l] = b[k];

b[k] = t;

}

for( i=k+1, mik=a->pd[k]; i<n; i++ )

b}

} /* End of for k loop */

CLEAN_UP:

*ipvt = nm1;

if( *akk == 0.0e0 ) info = n;

return( info );

}

/***************************************************************

******************************************************************

**** Gaussian Elimination with partial pivoting. ****

**** This file contains the solution routine SGESL ****

******************************************************************

****************************************************************/

void sgesl(struct FULL * a,int * ipvt,double b[],int job )

/*

PURPOSE

SGESL solves the real system

a * x = b or trans(a) * x = b

using the factors computed by SGECO or SGEFA.

INPUT

a A pointer to the FULL matrix structure containing the

factored

matrix. See the definition of FULL in ge.h.

ipvt The pivot vector (of length a->cd) from SGECO or SGEFA.

b The right hand side vector (of length a->cd).

job = 0 to solve a*x = b ,

= nonzero to solve trans(a)*x = b where

trans(a) is the transpose.

OUTPUT

b The solution vector x.

REMARKS

Error condition:

A division by zero will occur if the input factor contains a

zero on the diagonal. Technically this indicates singularity

but it is often caused by improper arguments or improper

setting of lda . It will not occur if the subroutines are

called correctly and if sgeco has set rcond .gt. 0.0

or sgefa has set info .eq. 0 .

*/

{

register double t, *mik;

double *akk;

register int i, k;

int l, n, nm1;

n = a->cd;

nm1 = n - 1;

/* job = 0 , solve A * x = b. */

if( job == 0 ) {

/* Forward elimination. Solve L*y = b. */

for( k=0; k<nm1; k++ ) {

akk = a->pd[k] + k; /* akk points to a(k,k). */

/* Interchange b[k] and b[l] if necessary. */

l = ipvt[k];

t = b[l];

if( l != k ) {

b[l] = b[k];

b[k] = t;

}

for( i=k+1, mik=a->pd[k]; i<n; i++ )

b

*+= t*mik**;*

}

/* Back substitution. Solve U*x = y. */

for( k=nm1; k>=0; k-- ) {

register double *uik = a->pd[k];

akk = uik + k;

b[k] /= (*akk);

for( i=0; i<k; i++ )

b}

/* Back substitution. Solve U*x = y. */

for( k=nm1; k>=0; k-- ) {

register double *uik = a->pd[k];

akk = uik + k;

b[k] /= (*akk);

for( i=0; i<k; i++ )

b

*-= uik***b[k];*

}

return;

}

/* job = nonzero. Solve trans(A) * x = b. */

/* First solve trans(U)*y = b. */

for( k=0; k<n; k++ ) {

register double *uik = a->pd[k];

akk = uik + k;

t = 0.0;

for( i=0; i<k; i++ )

t += uik}

return;

}

/* job = nonzero. Solve trans(A) * x = b. */

/* First solve trans(U)*y = b. */

for( k=0; k<n; k++ ) {

register double *uik = a->pd[k];

akk = uik + k;

t = 0.0;

for( i=0; i<k; i++ )

t += uik

**b**;*

b[k] = (b[k] - t) / (*akk);

}

/* b now contains y. */

/* Solve trans(L)*x = y. */

for( k=n-2; k>=0; k-- ) {

mik = a->pd[k];

t = 0.0;

for( i=k+1; i<n; i++ )

t += mikb[k] = (b[k] - t) / (*akk);

}

/* b now contains y. */

/* Solve trans(L)*x = y. */

for( k=n-2; k>=0; k-- ) {

mik = a->pd[k];

t = 0.0;

for( i=k+1; i<n; i++ )

t += mik

**b**;*

b[k] += t;

/* Interchange b(k) and b(ipvt(k)) if necessary. */

l = ipvt[k];

if( l == k ) continue;

t = b[l];

b[l] = b[k];

b[k] = t;

}

return;

}

/***************************************************************

*****************************************************************

*******************************************************************

***** *****

***** BLAS *****

***** Basic Linear Algebra Subroutines *****

***** Written in the C Programming Language. *****

***** *****

***** Functions include: *****

***** isamax, sasum, saxpy, scopy, sdot, snrm2, *****

***** *****

***** In addition a few other routines are included: *****

***** vexopy, vfill *****

***** *****

***** If your 3M library does not have the copysign function *****

***** then compile this file with -DCOPYSIGN and one will be *****

***** be supplied. *****

*******************************************************************

*****************************************************************

***************************************************************/

#ifndef HUGEsp

#define HUGEsp 1.0e+38

#endif

#ifndef SMALLsp

#define SMALLsp 1.0e-45

#endif

int isamax(int n, double *sx,int incx )

/*

PURPOSE

Finds the index of element having max. absolute value.

INPUT

n Number of elements to check.

sx Vector to be checked.

incx Every incx-th element is checked.

*/

{

register double smax = 0.0e0;

register int i, istmp = 0;

#ifndef abs

#define abs(x) ((x)<0.0?-(x)x))

#endif

#ifdef alliant

int isamax_();

istmp = isamax_( &n, sx, &incx )-1; /* Remember 0 is first. */

return( istmp );

#else

if( n <= 1 ) return( istmp );

if( incx != 1 ) {

/* Code for increment not equal to 1. */

if( incx < 0 ) sx = sx + ((-n+1)*incx + 1);

istmp = 0;

smax = abs( *sx );

sx += incx;

for( i=1; i<n; i++, sx+=incx )

if( abs( *sx ) > smax ) {

istmp = i;

smax = abs( *sx );

}

return( istmp );

}

/* Code for increment equal to 1. */

istmp = 0;

smax = abs(*sx);

sx++;

for( i=1; i<n; i++, sx++ )

if( abs( *sx ) > smax ) {

istmp = i;

smax = abs( *sx );

}

return( istmp );

#endif

}

double sasum(int n,double * sx,int incx )

/*

PURPOSE

Returns sum of magnitudes of single precision SX.

sasum = sum from 0 to n-1 of ABS(SX(1+I*INCX))

INPUT

n Number of elements to multiply.

sx Pointer to double vector to take abs sum of.

incx Storage incrament for sx.

RETURNS

sasum Double variable with the result.

WARNINGS

This routine uses the UN*X math library function fabs().

*/

{

#ifndef abs

#define abs(x) ((x)<0.0?-(x)x))

#endif

register double sum = 0.0;

if( n<= 0 ) return( sum );

if( incx == 1 ) {

register int i, m;

/* Code for increments equal to 1. */

/* Clean-up loop so remaining vector length is a multiple of 6. */

m = n % 6;

if( m != 0 ) {

for( i=0; i<m; i++ )

sum += abs( sxb[k] += t;

/* Interchange b(k) and b(ipvt(k)) if necessary. */

l = ipvt[k];

if( l == k ) continue;

t = b[l];

b[l] = b[k];

b[k] = t;

}

return;

}

/***************************************************************

*****************************************************************

*******************************************************************

***** *****

***** BLAS *****

***** Basic Linear Algebra Subroutines *****

***** Written in the C Programming Language. *****

***** *****

***** Functions include: *****

***** isamax, sasum, saxpy, scopy, sdot, snrm2, *****

***** *****

***** In addition a few other routines are included: *****

***** vexopy, vfill *****

***** *****

***** If your 3M library does not have the copysign function *****

***** then compile this file with -DCOPYSIGN and one will be *****

***** be supplied. *****

*******************************************************************

*****************************************************************

***************************************************************/

#ifndef HUGEsp

#define HUGEsp 1.0e+38

#endif

#ifndef SMALLsp

#define SMALLsp 1.0e-45

#endif

int isamax(int n, double *sx,int incx )

/*

PURPOSE

Finds the index of element having max. absolute value.

INPUT

n Number of elements to check.

sx Vector to be checked.

incx Every incx-th element is checked.

*/

{

register double smax = 0.0e0;

register int i, istmp = 0;

#ifndef abs

#define abs(x) ((x)<0.0?-(x)x))

#endif

#ifdef alliant

int isamax_();

istmp = isamax_( &n, sx, &incx )-1; /* Remember 0 is first. */

return( istmp );

#else

if( n <= 1 ) return( istmp );

if( incx != 1 ) {

/* Code for increment not equal to 1. */

if( incx < 0 ) sx = sx + ((-n+1)*incx + 1);

istmp = 0;

smax = abs( *sx );

sx += incx;

for( i=1; i<n; i++, sx+=incx )

if( abs( *sx ) > smax ) {

istmp = i;

smax = abs( *sx );

}

return( istmp );

}

/* Code for increment equal to 1. */

istmp = 0;

smax = abs(*sx);

sx++;

for( i=1; i<n; i++, sx++ )

if( abs( *sx ) > smax ) {

istmp = i;

smax = abs( *sx );

}

return( istmp );

#endif

}

double sasum(int n,double * sx,int incx )

/*

PURPOSE

Returns sum of magnitudes of single precision SX.

sasum = sum from 0 to n-1 of ABS(SX(1+I*INCX))

INPUT

n Number of elements to multiply.

sx Pointer to double vector to take abs sum of.

incx Storage incrament for sx.

RETURNS

sasum Double variable with the result.

WARNINGS

This routine uses the UN*X math library function fabs().

*/

{

#ifndef abs

#define abs(x) ((x)<0.0?-(x)x))

#endif

register double sum = 0.0;

if( n<= 0 ) return( sum );

if( incx == 1 ) {

register int i, m;

/* Code for increments equal to 1. */

/* Clean-up loop so remaining vector length is a multiple of 6. */

m = n % 6;

if( m != 0 ) {

for( i=0; i<m; i++ )

sum += abs( sx

*);*

if( n < 6 ) return( (double)sum );

}

for( i=m; i<n; i+=6 ) {

sum += abs( sxif( n < 6 ) return( (double)sum );

}

for( i=m; i<n; i+=6 ) {

sum += abs( sx

*) + abs( sx[i+1]) + abs( sx[i+2] ) +*

abs( sx[i+3] ) + abs( sx[i+4] ) + abs( sx[i+5] );

}

return( (double)sum );

}

else {

register int i, ns;

/* Code for increments not equal to 1. */

ns = n*incx;

for( i=0; i<ns; i+=incx ) {

sum += abs( sxabs( sx[i+3] ) + abs( sx[i+4] ) + abs( sx[i+5] );

}

return( (double)sum );

}

else {

register int i, ns;

/* Code for increments not equal to 1. */

ns = n*incx;

for( i=0; i<ns; i+=incx ) {

sum += abs( sx

*);*

}

return( (double)sum );

}

}

void saxpy(int n,double sa, double *sx,int incx,double * sy,int incy )

/*

PURPOSE

Vector times a scalar plus a vector. sy = sy + sa*sx.

INPUT

n Number of elements to multiply.

sa Scalar to multiply by (note that this is a double).

sx Pointer to double vector to scale.

incx Storage incrament for sx.

sy Pointer to double vector to add.

incy Storage incrament for sy.

OUTPUT

sy sy = sy + sa*sx

*/

{

register int i;

register double ssa = (double)sa;

if( n<=0 || ssa==0.0 ) return;

if( incx == incy ) {

if( incx == 1 ) {

/* Both increments = 1 */

for( i=0; i<n; i++ )

sy}

return( (double)sum );

}

}

void saxpy(int n,double sa, double *sx,int incx,double * sy,int incy )

/*

PURPOSE

Vector times a scalar plus a vector. sy = sy + sa*sx.

INPUT

n Number of elements to multiply.

sa Scalar to multiply by (note that this is a double).

sx Pointer to double vector to scale.

incx Storage incrament for sx.

sy Pointer to double vector to add.

incy Storage incrament for sy.

OUTPUT

sy sy = sy + sa*sx

*/

{

register int i;

register double ssa = (double)sa;

if( n<=0 || ssa==0.0 ) return;

if( incx == incy ) {

if( incx == 1 ) {

/* Both increments = 1 */

for( i=0; i<n; i++ )

sy

*+= ssa*sx**;*

return;

}

if( incx>0 ) {

/* Equal, positive, non-unit increments. */

for( i=0; i<n; i++,sx+=incx,sy+=incx )

*sy += ssa*(*sx);

return;

}

}

/* Unequal or negative increments. */

if( incx < 0 ) sx += ((-n+1)*incx + 1);

if( incy < 0 ) sy += ((-n+1)*incy + 1);

for( i=0; i<n; i++,sx+=incx,sy+=incy )

*sy += ssa*(*sx);

}

void saxpyx( int n, double sa, double *sx,int incx,double * sy,int incy )

/*

PURPOSE

Vector times a scalar plus a vector. sx = sy + sa*sx.

INPUT

n Number of elements to multiply.

sa Scalar to multiply by (note that this is a double).

sx Pointer to double vector to scale.

incx Storage incrament for sx.

sy Pointer to double vector to add.

incy Storage incrament for sy.

OUTPUT

sx sx = sy + sa*sx

*/

{

register i;

register double ssa = (double)sa;

if( n<=0 || ssa==0.0 ) return;

if( incx == incy ) {

if( incx == 1 ) {

/* Both increments = 1 */

for( i=0; i<n; i++ )

sxreturn;

}

if( incx>0 ) {

/* Equal, positive, non-unit increments. */

for( i=0; i<n; i++,sx+=incx,sy+=incx )

*sy += ssa*(*sx);

return;

}

}

/* Unequal or negative increments. */

if( incx < 0 ) sx += ((-n+1)*incx + 1);

if( incy < 0 ) sy += ((-n+1)*incy + 1);

for( i=0; i<n; i++,sx+=incx,sy+=incy )

*sy += ssa*(*sx);

}

void saxpyx( int n, double sa, double *sx,int incx,double * sy,int incy )

/*

PURPOSE

Vector times a scalar plus a vector. sx = sy + sa*sx.

INPUT

n Number of elements to multiply.

sa Scalar to multiply by (note that this is a double).

sx Pointer to double vector to scale.

incx Storage incrament for sx.

sy Pointer to double vector to add.

incy Storage incrament for sy.

OUTPUT

sx sx = sy + sa*sx

*/

{

register i;

register double ssa = (double)sa;

if( n<=0 || ssa==0.0 ) return;

if( incx == incy ) {

if( incx == 1 ) {

/* Both increments = 1 */

for( i=0; i<n; i++ )

sx

*= sy**+ ssa*sx**;*

return;

}

if( incx>0 ) {

/* Equal, positive, non-unit increments. */

for( i=0; i<n; i++, sx+=incx, sy+=incx)

*sx = *sy + ssa*(*sx);

return;

}

}

/* Unequal or negative increments. */

if( incx < 0 ) sx += ((-n+1)*incx + 1);

if( incy < 0 ) sy += ((-n+1)*incy + 1);

for( i=0; i<n; i++,sx+=incx,sy+=incy )

*sx = *sy + ssa*(*sx);

}

void scopy(int n,double * sx,int incx, double *sy,int incy )

/*

PURPOSE

Copies vector sx into vector sy.

INPUT

n Number of components to copy.

sx Source vector

incx Index increment for sx.

incy Index increment for sy.

OUTPUT

sy Destination vector.

*/

{

register int i;

if( n<1 ) return;

if( incx == incy ) {

if( incx == 1 ) {

/* Both increments = 1 */

for( i=0; i<n; i++ )

syreturn;

}

if( incx>0 ) {

/* Equal, positive, non-unit increments. */

for( i=0; i<n; i++, sx+=incx, sy+=incx)

*sx = *sy + ssa*(*sx);

return;

}

}

/* Unequal or negative increments. */

if( incx < 0 ) sx += ((-n+1)*incx + 1);

if( incy < 0 ) sy += ((-n+1)*incy + 1);

for( i=0; i<n; i++,sx+=incx,sy+=incy )

*sx = *sy + ssa*(*sx);

}

void scopy(int n,double * sx,int incx, double *sy,int incy )

/*

PURPOSE

Copies vector sx into vector sy.

INPUT

n Number of components to copy.

sx Source vector

incx Index increment for sx.

incy Index increment for sy.

OUTPUT

sy Destination vector.

*/

{

register int i;

if( n<1 ) return;

if( incx == incy ) {

if( incx == 1 ) {

/* Both increments = 1 */

for( i=0; i<n; i++ )

sy

*= sx**;*

return;

}

if( incx > 0 ) {

/* Equal, positive, non-unit increments. */

for( i=0; i<n; i++, sx+=incx, sy+=incx)

*sy = *sx;

return;

}

}

/* Non-equal or negative increments. */

if( incx < 0 ) sx += ((-n+1)*incx + 1);

if( incy < 0 ) sy += ((-n+1)*incy + 1);

for( i=0; i<n; i++,sx+=incx,sy+=incy )

(*sx) = (*sy);

return;

}

double sdot(int n,double * sx,int incx,double * sy, int incy )

/*

PURPOSE

Forms the dot product of a vector.

INPUT

n Number of elements to sum.

sx Address of first element of x vector.

incx Incrament for the x vector.

sy Address of first element of y vector.

incy incrament for the y vector.

OUPUT

sdot Dot product x and y. Double returned

due to `C' language features.

*/

{

register int i;

register double stemp = 0.0e0;

if( n<1 ) return( (double)stemp );

if( incx == incy ) {

if( incx == 1 ) {

/* Both increments = 1 */

for( i=0; i<n; i++ )

stemp += sxreturn;

}

if( incx > 0 ) {

/* Equal, positive, non-unit increments. */

for( i=0; i<n; i++, sx+=incx, sy+=incx)

*sy = *sx;

return;

}

}

/* Non-equal or negative increments. */

if( incx < 0 ) sx += ((-n+1)*incx + 1);

if( incy < 0 ) sy += ((-n+1)*incy + 1);

for( i=0; i<n; i++,sx+=incx,sy+=incy )

(*sx) = (*sy);

return;

}

double sdot(int n,double * sx,int incx,double * sy, int incy )

/*

PURPOSE

Forms the dot product of a vector.

INPUT

n Number of elements to sum.

sx Address of first element of x vector.

incx Incrament for the x vector.

sy Address of first element of y vector.

incy incrament for the y vector.

OUPUT

sdot Dot product x and y. Double returned

due to `C' language features.

*/

{

register int i;

register double stemp = 0.0e0;

if( n<1 ) return( (double)stemp );

if( incx == incy ) {

if( incx == 1 ) {

/* Both increments = 1 */

for( i=0; i<n; i++ )

stemp += sx

**sy**;*

return( (double)stemp );

}

if( incx>0 ) {

/* Equal, positive, non-unit increments. */

for( i=0; i<n; i++, sx+=incx, sy+=incx)

stemp += (*sx)*(*sy);

return( (double)stemp );

}

}

/* Unequal or negative increments. */

if( incx < 0 ) sx += ((-n+1)*incx + 1);

if( incy < 0 ) sy += ((-n+1)*incy + 1);

for( i=0; i<n; i++,sx+=incx,sy+=incy )

stemp += (*sx)*(*sy);

return( (double)stemp );

} /* End of ---SDOT--- */

double snrm2( int n,double * sx,int incx )

/*

PURPOSE

Computes the Euclidean norm of sx while being

very careful of distructive underflow and overflow.

INPUT

n Number of elements to use.

sx Address of first element of x vector.

incx Incrament for the x vector (>0).

OUPUT

snrm2 Euclidean norm of sx. Returns double

due to `C' language features.

REMARKS

This algorithm proceeds in four steps.

1) scan zero components.

2) do phase 2 when component is near underflow.

*/

{

register int i;

static double cutlo, cuthi;

double sum = 0.0e0, hitst, r1mach();

double xmax;

if( n<1 || incx<1 ) return( sum );

/* Calculate near underflow */

if( cutlo == 0.0 ) cutlo = sqrt( SMALLsp/r1mach() );

/* Calculate near overflow */

if( cuthi == 0.0 ) cuthi = sqrt( HUGEsp );

hitst = cuthi/(double) n;

i = 0;

/* Zero Sum. */

while( *sx == 0.0 && i<n ) {

i++;

sx += incx;

}

if( i>=n ) return( sum );

START:

if( abs( *sx ) > cutlo ) {

for( ; i<n; i++, sx+=incx ) { /* Loop over elements. */

if( abs( *sx ) > hitst ) goto GOT_LARGE;

sum += (*sx) * (*sx);

}

sum = sqrt( sum );

return( sum ); /* Sum completed

normaly. */

}

else { /* Small sum prepare

for phase 2. */

xmax = abs( *sx );

sx += incx;

i++;

sum += 1.0;

for( ; i<n; i++, sx+=incx ) {

if( abs( *sx ) > cutlo ) { /* Got normal elem.

Rescale and process. */

sum = (sum*xmax)*xmax;

goto START;

}

if( abs( *sx ) > xmax ) {

sum = 1.0 + sum*(xmax /(*sx))*(xmax /(*sx));

xmax = abs( *sx );

continue;

}

sum += ((*sx)/xmax)*((*sx)/xmax);

}

return( (double)xmax*sqrt( sum ) );

} /* End of small sum. */

GOT_LARGE:

sum = 1.0 + (sum/(*sx))/(*sx); /* Rescale and process. */

xmax = abs( *sx );

sx += incx;

i++;

for( ; i<n; i++, sx+=incx ) {

if( abs( *sx ) > xmax ) {

sum = 1.0 + sum*(xmax /(*sx))*(xmax /(*sx));

xmax = abs( *sx );

continue;

}

sum += ((*sx)/xmax)*((*sx)/xmax);

}

return( (double)xmax*sqrt( sum ) ); /* End of small sum. */

} /* End of ---SDOT--- */

double r1mach()

/***********************************************************************

**** This routine computes the unit roundoff for single precision

**** of the machine. This is defined as the smallest positive

**** machine number u such that 1.0 + u .ne. 1.0

**** Returns a double due to `C' language features.

**********************************************************************/

{

double u = 1.0e0, comp;

do {

u *= 0.5e0;

comp = 1.0e0 + u;

}

while( comp != 1.0e0 );

return( (double)u*2.0e0 );

} /*-------------------- end of function r1mach ------------------------*/

int min0(int n,int a,int b,int c,int d,int e,int f,int g,int h,int i,int

j,int k,int l,int m,int o,int p )

/*

PURPOSE

Determine the minimum of the arguments a-p.

INPUT

n Number of arguments to check 1 <= n <= 15.

a-p Integer arguments of which the minumum is desired.

RETURNS

min0 Minimum of a thru p.

*/

{

int mt;

if( n < 1 || n > 15 ) return( -1 );

mt = a;

if( n == 1 ) return( mt );

if( mt > b ) mt = b;

if( n == 2 ) return( mt );

if( mt > c ) mt = c;

if( n == 3 ) return( mt );

if( mt > d ) mt = d;

if( n == 4 ) return( mt );

if( mt > e ) mt = e;

if( n == 5 ) return( mt );

if( mt > f ) mt = f;

if( n == 6 ) return( mt );

if( mt > g ) mt = g;

if( n == 7 ) return( mt );

if( mt > h ) mt = h;

if( n == 8 ) return( mt );

if( mt > i ) mt = i;

if( n == 9 ) return( mt );

if( mt > j ) mt = j;

if( n == 10 ) return( mt );

if( mt > k ) mt = k;

if( n == 11 ) return( mt );

if( mt > l ) mt = l;

if( n == 12 ) return( mt );

if( mt > m ) mt = m;

if( n == 13 ) return( mt );

if( mt > o ) mt = o;

if( n == 14 ) return( mt );

if( mt > p ) mt = p;

return( mt );

}

void sscal(int n,double sa,double * sx,int incx )

/*

PURPOSE

Scales a vector by a constant.

INPUT

n Number of components to scale.

sa Scale value (note that this is a double).

sx Vector to scale.

incx Every incx-th element of sx will be scaled.

OUTPUT

sx Scaled vector.

*/

{

register double ssa = (double)sa;

int i;

#ifdef alliant

void sscal_();

double ssaa = (double)sa;

sscal_( &n, &ssaa, sx, &incx );

return;

#else

if( n < 1 ) return;

/* Code for increment not equal to 1.*/

if( incx != 1 ) {

if( incx < 0 ) sx += (-n+1)*incx;

for( i=0; i<n; i++, sx+=incx )

*sx *= ssa;

return;

}

/* Code for unit increment. */

for( i=0; i<n; i++ )

sxreturn( (double)stemp );

}

if( incx>0 ) {

/* Equal, positive, non-unit increments. */

for( i=0; i<n; i++, sx+=incx, sy+=incx)

stemp += (*sx)*(*sy);

return( (double)stemp );

}

}

/* Unequal or negative increments. */

if( incx < 0 ) sx += ((-n+1)*incx + 1);

if( incy < 0 ) sy += ((-n+1)*incy + 1);

for( i=0; i<n; i++,sx+=incx,sy+=incy )

stemp += (*sx)*(*sy);

return( (double)stemp );

} /* End of ---SDOT--- */

double snrm2( int n,double * sx,int incx )

/*

PURPOSE

Computes the Euclidean norm of sx while being

very careful of distructive underflow and overflow.

INPUT

n Number of elements to use.

sx Address of first element of x vector.

incx Incrament for the x vector (>0).

OUPUT

snrm2 Euclidean norm of sx. Returns double

due to `C' language features.

REMARKS

This algorithm proceeds in four steps.

1) scan zero components.

2) do phase 2 when component is near underflow.

*/

{

register int i;

static double cutlo, cuthi;

double sum = 0.0e0, hitst, r1mach();

double xmax;

if( n<1 || incx<1 ) return( sum );

/* Calculate near underflow */

if( cutlo == 0.0 ) cutlo = sqrt( SMALLsp/r1mach() );

/* Calculate near overflow */

if( cuthi == 0.0 ) cuthi = sqrt( HUGEsp );

hitst = cuthi/(double) n;

i = 0;

/* Zero Sum. */

while( *sx == 0.0 && i<n ) {

i++;

sx += incx;

}

if( i>=n ) return( sum );

START:

if( abs( *sx ) > cutlo ) {

for( ; i<n; i++, sx+=incx ) { /* Loop over elements. */

if( abs( *sx ) > hitst ) goto GOT_LARGE;

sum += (*sx) * (*sx);

}

sum = sqrt( sum );

return( sum ); /* Sum completed

normaly. */

}

else { /* Small sum prepare

for phase 2. */

xmax = abs( *sx );

sx += incx;

i++;

sum += 1.0;

for( ; i<n; i++, sx+=incx ) {

if( abs( *sx ) > cutlo ) { /* Got normal elem.

Rescale and process. */

sum = (sum*xmax)*xmax;

goto START;

}

if( abs( *sx ) > xmax ) {

sum = 1.0 + sum*(xmax /(*sx))*(xmax /(*sx));

xmax = abs( *sx );

continue;

}

sum += ((*sx)/xmax)*((*sx)/xmax);

}

return( (double)xmax*sqrt( sum ) );

} /* End of small sum. */

GOT_LARGE:

sum = 1.0 + (sum/(*sx))/(*sx); /* Rescale and process. */

xmax = abs( *sx );

sx += incx;

i++;

for( ; i<n; i++, sx+=incx ) {

if( abs( *sx ) > xmax ) {

sum = 1.0 + sum*(xmax /(*sx))*(xmax /(*sx));

xmax = abs( *sx );

continue;

}

sum += ((*sx)/xmax)*((*sx)/xmax);

}

return( (double)xmax*sqrt( sum ) ); /* End of small sum. */

} /* End of ---SDOT--- */

double r1mach()

/***********************************************************************

**** This routine computes the unit roundoff for single precision

**** of the machine. This is defined as the smallest positive

**** machine number u such that 1.0 + u .ne. 1.0

**** Returns a double due to `C' language features.

**********************************************************************/

{

double u = 1.0e0, comp;

do {

u *= 0.5e0;

comp = 1.0e0 + u;

}

while( comp != 1.0e0 );

return( (double)u*2.0e0 );

} /*-------------------- end of function r1mach ------------------------*/

int min0(int n,int a,int b,int c,int d,int e,int f,int g,int h,int i,int

j,int k,int l,int m,int o,int p )

/*

PURPOSE

Determine the minimum of the arguments a-p.

INPUT

n Number of arguments to check 1 <= n <= 15.

a-p Integer arguments of which the minumum is desired.

RETURNS

min0 Minimum of a thru p.

*/

{

int mt;

if( n < 1 || n > 15 ) return( -1 );

mt = a;

if( n == 1 ) return( mt );

if( mt > b ) mt = b;

if( n == 2 ) return( mt );

if( mt > c ) mt = c;

if( n == 3 ) return( mt );

if( mt > d ) mt = d;

if( n == 4 ) return( mt );

if( mt > e ) mt = e;

if( n == 5 ) return( mt );

if( mt > f ) mt = f;

if( n == 6 ) return( mt );

if( mt > g ) mt = g;

if( n == 7 ) return( mt );

if( mt > h ) mt = h;

if( n == 8 ) return( mt );

if( mt > i ) mt = i;

if( n == 9 ) return( mt );

if( mt > j ) mt = j;

if( n == 10 ) return( mt );

if( mt > k ) mt = k;

if( n == 11 ) return( mt );

if( mt > l ) mt = l;

if( n == 12 ) return( mt );

if( mt > m ) mt = m;

if( n == 13 ) return( mt );

if( mt > o ) mt = o;

if( n == 14 ) return( mt );

if( mt > p ) mt = p;

return( mt );

}

void sscal(int n,double sa,double * sx,int incx )

/*

PURPOSE

Scales a vector by a constant.

INPUT

n Number of components to scale.

sa Scale value (note that this is a double).

sx Vector to scale.

incx Every incx-th element of sx will be scaled.

OUTPUT

sx Scaled vector.

*/

{

register double ssa = (double)sa;

int i;

#ifdef alliant

void sscal_();

double ssaa = (double)sa;

sscal_( &n, &ssaa, sx, &incx );

return;

#else

if( n < 1 ) return;

/* Code for increment not equal to 1.*/

if( incx != 1 ) {

if( incx < 0 ) sx += (-n+1)*incx;

for( i=0; i<n; i++, sx+=incx )

*sx *= ssa;

return;

}

/* Code for unit increment. */

for( i=0; i<n; i++ )

sx

**= ssa;*

return;

#endif

}

void vexopy(int n,double * v,double * x,double * y,int itype )

/*

Purpose:

To operate on the vectors x and y.

Input:

n Number of elements to scale.

x First operand vector.

y Second operand vector.

itype Type of operation to perform:

itype = 1 => '+'

itype = 2 => '-'

Output:

v Result vector of x op y.

*/

{

register int i;

if( n<1 ) return;

if( itype == 1 ) /* ADDITION. */

for( i=0; i<n; i++ )

vreturn;

#endif

}

void vexopy(int n,double * v,double * x,double * y,int itype )

/*

Purpose:

To operate on the vectors x and y.

Input:

n Number of elements to scale.

x First operand vector.

y Second operand vector.

itype Type of operation to perform:

itype = 1 => '+'

itype = 2 => '-'

Output:

v Result vector of x op y.

*/

{

register int i;

if( n<1 ) return;

if( itype == 1 ) /* ADDITION. */

for( i=0; i<n; i++ )

v

*= x**+ y**;*

else /* SUBTRACTION. */

for( i=0; i<n; i++ )

velse /* SUBTRACTION. */

for( i=0; i<n; i++ )

v

*= x**- y**;*

}

void vfill(int n,double * v,double val )

/*

Purpose

To fill the FLOAT vector v with the value val.

Make sure that if you pass a value for val that you cast

it to double, viz.,

vfill( 100, vector, (double)0.0 );

*/

{

register int i;

register double vval = (double)val;

if( n<1 ) return;

for( i=0; i<n; i++ )

v}

void vfill(int n,double * v,double val )

/*

Purpose

To fill the FLOAT vector v with the value val.

Make sure that if you pass a value for val that you cast

it to double, viz.,

vfill( 100, vector, (double)0.0 );

*/

{

register int i;

register double vval = (double)val;

if( n<1 ) return;

for( i=0; i<n; i++ )

v

*= vval;*

}

#if 1

double copysign(double x,double y )

/*

PURPOSE

This routine is recomended by the IEEE standard 754. Most C

3M libraries contain this function. If yours does not then

compile this file with the -DCOPYSIGN option and the following

code will be generated. This routine copies the sign from y

onto x.

INPUT

x Thing to have it's sign changed.

y Variable to supply the sign.

RETURNS

copysign Returns x with the sign of y (the fortran intrinsic

sign(x,y)).

*/

{

/* Registers are used for reduced memory traffic. */

register double xx = x;

register double yy = y;

if( xx*yy < 0.0 ) return( -xx );

else return( xx );

}

#endif}

#if 1

double copysign(double x,double y )

/*

PURPOSE

This routine is recomended by the IEEE standard 754. Most C

3M libraries contain this function. If yours does not then

compile this file with the -DCOPYSIGN option and the following

code will be generated. This routine copies the sign from y

onto x.

INPUT

x Thing to have it's sign changed.

y Variable to supply the sign.

RETURNS

copysign Returns x with the sign of y (the fortran intrinsic

sign(x,y)).

*/

{

/* Registers are used for reduced memory traffic. */

register double xx = x;

register double yy = y;

if( xx*yy < 0.0 ) return( -xx );

else return( xx );

}

#endif