core routines

SYNOPSIS

#include "machine.h"
/* or #include "matrix.h" */
void   __add__ (Real dp1[], Real dp2[], Real out[], int len)
double __ip__  (Real dp1[], Real dp2[], int len)
void   __mltadd__(Real dp1[], Real dp2[], double s, int len)
void   __smlt__(Real dp[],  double s,   Real out[], int len)
void   __sub__ (Real dp1[], Real dp2[], Real out[], int len)
void   __zero__(Real dp[],  int len)
#include "zmatrix.h"
void    __zadd__ (complex z1[], complex z2[],
                  complex out[], int len);
void    __zconj__(complex z[],   int len);
complex __zip__  (complex z1[], complex z2[],
                                 int len, int conj);
void    __zmlt__ (complex z1[], complex s, complex z2[],
                                 int len);
void    __zmltadd__(complex z1[], complex z2[], complex s,
                                 int len, int conj);
void    __zsub__ (complex z1[], complex z2[], complex out[],
                                 int len);
void    __zzero__(complex z[],   int len);

DESCRIPTION

These routines are the underlying routines for almost all dense matrix routines. Unlike the other routines in this library they do not take pointers to structures as arguments. Instead they work directly with arrays of Real's. It is intended that these routines should be fast. {\bf If you wish to take full advantage of a particular architecture, it is suggested that you modify these routines.} The current implementation does not use any special techniques for boosting speed, such as loop unrolling or assembly code, in the interests of simplicity and portability. Note that zconj(z), referred to below, returns the complex conjugate of z. The routine __add__() sets out[i] = dp1[i]+dp2[i] for i ranging from zero to len-1. The routine __zadd__() sets out[i] = z1[i]+z2[i] for i ranging from zero to len-1. The routine __ip__() returns the sum of dp1[i]*dp2[i] for i ranging from zero to len-1. The routine __zip__() returns the sum of z1[i]*z2[i] for i ranging from zero to len-1 if conj is Z_NOCONJ, and returns the sum of zconj(z1[i])*z2[i] for i ranging from zero to len-1 if conj is Z_CONJ. The routine __mltadd__() sets dp1[i] = dp1[i]+s*dp2[i] for i ranging from zero to len-1. The routine __zmltadd__() sets \newlinez1[i] = z1[i]+s*z2[i] for i ranging from zero to len-1 if conj is Z_NOCONJ, and sets dp1[i] = z1[i]+s*zconj(z2[i]) for i ranging from zero to len-1 if conj is Z_CONJ. The routine __smlt__() sets out[i] = s*dp[i] for i ranging from zero to len-1. The routine __zmlt__() sets out[i] = s*z[i] for i ranging from zero to len-1. The routine __sub__() sets out[i] = dp1[i]-dp2[i] for i ranging from zero to len-1. The routine __zsub__() sets out[i] = z1[i]-z2[i] for i ranging from zero to len-1. The routines __zero__() and __zzero__() set out[i] = 0.0 for i ranging from zero to len-1. These routines should be used instead of the macro MEM_ZERO() or the ANSI~C routine memset() for portability, in case the floating point zero is not represented by a bit string of zeros.

EXAMPLE

MAT    *A, *B;
ZVEC   *x, *y;
Real    alpha;
  ......
/* set A = A + alpha.B */
for ( i = 0; i < m; i++ )
    __mltadd__(A->me[i],B->me[i],alpha,A->n);
/* zero row 3 of A */
__zero__(A->me[3],A->n);
/* quick complex inner product */
z_output(__zip__(x->ve,y->ve,x->dim,Z_CONJ));

SOURCE FILE: machine.c, zmachine.c