plj = j * n + l;
plj = j*n+lp1; /* x(lp1,j) */
t2.re = -(e+j)->re;
plj = j*p+lp1; /* v(lp1,j) */
*tp1++ = Zero;
temp.re = CMULT_RE(r,*tp1);
tp1->re = -tp1->re;
temp = *tp1;
pxlj = x + plj; /* x(l,j) */
if (l < nct && %<CQABS>(*psl) != %<realZero>) {
/*
* Apply the transformation.
*/
/* t = -dot(nml,pxll,pxlj) */
tp1 = pxll;
tp2 = pxlj;
t = Zero;
for(ii=0; ii
t.im += tp1->re * tp2->im - tp1->im * tp2->re;
tp1++;
tp2++;
}
t.re = -t.re;
t.im = -t.im;
/* t = -dot(nml,pxll,pxlj) */
%<CDIV>(t, *pxll, t);
/* axpy(nml,t,pxll,pxlj) */
tp1 = pxlj;
tp2 = pxll;
for(ii=0; ii
temp.im = CMULT_IM(t,*tp2);
tp1->re += temp.re;
tp1->im += temp.im;
tp1++;
tp2++;
}
/* axpy(nml,t,pxll,pxlj) */
}
/*
* Place the l-th row of x into e for the
* subsequent calculation of the row transformation.
*/
CCONJ(*pxlj,*(e+j));
}
if (wantv && l < nct) {
/*
* Place the transformation in u for
* subsequent back multiplication.
*/
tp1=u+pll;
tp2=pxll;
for (ii=0; ii
}
}
if (l < nrt) {
/*
* Compute the l-th row transformation and place
* the l-th super-diagonal in e(l).
*/
/* psl->re = nrm2(p-lp1,pel1) */
pel->re = %<realZero>;
tp1 = pel1;
for(ii=0; ii
%<CHYPOT>(pel->re,tp1->im,pel->re);
tp1++;
}
/* psl->re = nrm2(p-lp1,pel1) */
pel->im = %<realZero>;
if (%<CQABS>(*pel) != %<realZero>) {
if (%<CQABS>(*pel1) != %<realZero>) {
/* *pel = sign(*pel,*pel1) */
%<CHYPOT>(pel1->re, pel1->im, rt1);
if (rt1 == 0) {
%<CHYPOT>(pel->re, pel->im, pel->re);
pel->im = %<realZero>;
} else {
%<CHYPOT>(pel->re, pel->im, rt2);
rt2 /= rt1;
pel->re = rt2 * pel1->re;
pel->im = rt2 * pel1->im;
}
/* *pel = sign(*pel,*pel1) */
}
/* scal(p-lp1,One / *pel,pel1) */
%<CDIV>(One, *pel, temp1);
tp1 = pel1;
for(ii=0; ii
temp.im = CMULT_IM(temp1, *tp1);
*tp1++ = temp;
}
/* scal(p-lp1,One / *pel,pel1) */
(pel+1)->re += %<realOne>;
}
pel->re = -pel->re;
if (lp1 < n && %<CQABS>(*pel) != %<realZero>) {
/*
* Apply the transformation.
*/
tp1 = work+lp1;
for(ii=0; ii
}
for (j=lp1; j
/* axpy(n-lp1,*(e+j),x+plj,work+lp1) */
tp1 = work+lp1;
tp2 = x+plj;
for(ii=0; ii
temp.im = CMULT_IM(*(e+j),*tp2);
tp1->re += temp.re;
tp1->im += temp.im;
tp1++;
tp2++;
}
/* axpy(n-lp1,*(e+j),x+plj,work+lp1) */
}
for (j=lp1; j
t2.im = -(e+j)->im;
%<CDIV>(t2, *pel1,t);
t.im = -t.im;
plj = j*n+lp1; /* x(lp1,j) */
/* axpy(n-lp1,t,work+lp1,x+plj) */
tp1 = x+plj;
tp2 = work+lp1;
for(ii=0; ii
temp.im = CMULT_IM(t,*tp2);
tp1->re += temp.re;
tp1->im += temp.im;
tp1++;
tp2++;
}
/* axpy(n-lp1,t,work+lp1,x+plj) */
}
}
if (wantv) {
/*
* Place the transformation in v for
* subsequent back multiplication.
*/
pll = lp1 + l * p; /* pointer to v(lp1,l) */
tp1 = pel1;
tp2 = v+pll;
for(ii=0; ii
}
}
}
}
/*
*-------------------------------------------------------------
* set up the final bidiagonal matrix or order m.
*/
mm1 = m = MIN(p, np1);
mm1--;
if (nct < p) {
pil = nct * np1; /* pointer to x(nct,nct) */
*(s+nct) = *(x+pil);
}
if (n < m) {
*(s+mm1) = Zero;
}
if (nrt < mm1) {
pil = nrt + n * mm1; /* pointer to x(nrt,m) */
*(e+nrt) = *(x+pil);
}
*(e+mm1) = Zero;
/*
*-------------------------------------------------------------
* if required, generate u.
*/
if (wantv) {
for (j=nct; j
for(ii=0; ii
}
*(u+j*np1) = One;
}
for (l=nct-1; l>=0; l--) {
nml = n - l;
pll = l * np1;
pull = u + pll; /* u(l,l) */
if (%<CQABS>(*(s+l)) != %<realZero>) {
lp1 = l + 1;
for (j=lp1; j
/* t = -dot(nml,pull,u+plj) */
tp1 = pull;
tp2 = u+plj;
t = Zero;
for(ii=0; ii
t.im += tp1->re * tp2->im - tp1->im * tp2->re;
tp1++;
tp2++;
}
t.re = -t.re;
t.im = -t.im;
/* t = -dot(nml,pull,u+plj) */
%<CDIV>(t, *pull, t);
/* axpy(nml,t,pull,u+plj) */
tp1 = u+plj;
tp2 = pull;
for(ii=0; ii
temp.im = CMULT_IM(t,*tp2);
tp1->re += temp.re;
tp1->im += temp.im;
tp1++;
tp2++;
}
/* axpy(nml,t,pull,u+plj) */
}
/* scal(nml,MinusOne,pull) */
tp1 = pull;
for(ii=0; ii
tp1->im = -tp1->im;
tp1++;
}
/* scal(nml,MinusOne,pull) */
pull->re += %<realOne>;
if (l >= 1) {
tp1 = pull-l;
for(ii=0; ii
}
}
} else {
tp1 = pull-l;
for(ii=0; ii
}
*pull = One;
}
}
}
/*
*-------------------------------------------------------------
* If it is required, generate v.
*/
if (wantv) {
for (l=p-1; l>=0; l--) {
lp1 = l + 1;
pll = l*p+lp1;
pvll = v + pll; /* v(lp1,l) */
if (l < nrt && %<CQABS>(*(e+l)) != %<realZero>) {
for (j=lp1; j
/* t = -dot(p-lp1,pvll,v+plj) */
tp1 = pvll;
tp2 = v+plj;
t = Zero;
for(ii=0; ii
t.im += tp1->re * tp2->im - tp1->im * tp2->re;
tp1++;
tp2++;
}
t.re = -t.re;
t.im = -t.im;
/* t = -dot(p-lp1,pvll,v+plj) */
%<CDIV>(t, *pvll, t);
/* axpy(p-lp1,t,pvll,v+plj) */
tp1 = v+plj;
tp2 = pvll;
for(ii=0; ii
temp.im = CMULT_IM(t,*tp2);
tp1->re += temp.re;
tp1->im += temp.im;
tp1++;
tp2++;
}
/* axpy(p-lp1,t,pvll,v+plj) */
}
}
tp1 = pvll-lp1;
for(ii=0; ii
}
*(pvll-1) = One; /* v(l,l) */
}
}
/*
*-------------------------------------------------------------
* Transform s and e so that they are real.
*/
for (l=0; l
psl = s + l; /* pointer to s(l) */
pel = e + l; /* pointer to e(l) */
%<CHYPOT>(psl->re, psl->im, t.re);
if (t.re != %<realZero>) {
r.re = psl->re / t.re;
r.im = psl->im / t.re;
psl->re = t.re;
psl->im = %<realZero>;
if (lp1 < m) {
%<CDIV>(*pel, r, *pel);
}
if (wantv && l < n) {
/* scal(n,r,u+l*n) */
tp1 = u+l*n;
for(ii=0; ii
temp.im = CMULT_IM(r,*tp1);
*tp1++ = temp;
}
/* scal(n,r,u+l*n) */
}
}
if (lp1 == m) break; /* ...exit */
%<CHYPOT>(pel->re, pel->im, t.re);
if (t.re != %<realZero>) {
temp.re = t.re;
temp.im = %<realZero>;
%<CDIV>(temp, *pel, r);
pel->re = t.re;
pel->im = %<realZero>;
psl++; /* s(l+1) */
temp.re = CMULT_RE(*psl,r);
temp.im = CMULT_IM(*psl,r);
*psl = temp;
if (wantv) {
/* scal(p,r,v+p*lp1) */
tp1 = v+p*lp1;
for(ii=0; ii
temp.im = CMULT_IM(r,*tp1);
*tp1++ = temp;
}
/* scal(p,r,v+p*lp1) */
}
}
}
/*
*-------------------------------------------------------------
* Main iteration loop for the singular values.
*/
mm = m;
iter = 0;
snorm = 0;
for (l=0; l
}
/*
* Quit if all the singular values have been found, or
* if too many iterations have been performed, set
* flag and return.
*/
while (m != 0 && iter <= %<MAXIT>) {
/*
* This section of the program inspects for
* negligible elements in the s and e arrays. On
* completion the variable kase is set as follows.
*
* kase = 1 if sr(m) and er(l-1) are negligible
* and l < m
* kase = 2 if sr(l) is negligible and l < m
* kase = 3 if er(l-1) is negligible, l < m, and
* sr(l), ..., sr(m) are not
* negligible (qr step).
* kase = 4 if er(m-1) is negligible (convergence).
*/
mm1 = m - 1;
mm2 = m - 2;
for (l=mm2; l>=0; l--) {
test = %<fabs>((s+l)->re) + %<fabs>((s+l+1)->re);
ztest = %<fabs>((e+l)->re);
if (!%<dspIsFinite">dspIsFinite(dTypeIdentifier, "test")> || !%<dspIsFinite">dspIsFinite(dTypeIdentifier, "ztest")>) {
info = -1;
return(info);
}
if ((ztest <= eps*test) || (ztest <= tiny) ||
(iter > 20 && ztest <= eps*snorm)) {
(e+l)->re = %<realZero>;
break; /* ...exit */
}
}
if (l == mm2) {
kase = 4;
} else {
lp1 = l + 1;
for (ls=m; ls>lp1; ls--) {
test = %<realZero>;
if (ls != m) test += %<fabs>((e+ls-1)->re);
if (ls != l + 2) test += %<fabs>((e+ls-2)->re);
ztest = %<fabs>((s+ls-1)->re);
if (!%<dspIsFinite">dspIsFinite(dTypeIdentifier, "test")> || !%<dspIsFinite">dspIsFinite(dTypeIdentifier, "ztest")>) {
return(info);
}
if ((ztest <= eps*test) || (ztest <= tiny)) {
(s+ls-1)->re = %<realZero>;
break; /* ...exit */
}
}
if (ls == lp1) {
kase = 3;
} else if (ls == m) {
kase = 1;
} else {
kase = 2;
l = ls - 1;
}
}
lm1 = ++l - 1;
/*
* Perform the task indicated by kase.
*/
switch (kase) {
case 1: /* Deflate negligible sr(m). */
f = (e+mm2)->re;
(e+mm2)->re = %<realZero>;
for (k=mm2; k>=l; k--) {
t1 = (s+k)->re;
%<dspGivensRot(dTypeIdentifier, "&t1", "&f", "&cs", "&sn")>;
(s+k)->re = t1;
if (k != l) {
f = -sn * (e+k-1)->re;
(e+k-1)->re *= cs;
}
if (wantv) {
%<rot_cplx(dTypeIdentifier, "p", "cs", "sn", "v+k*p", "v+mm1*p")>;
}
}
break;
case 2: /* split at negligible sr(l). */
f = (e+lm1)->re;
(e+lm1)->re = %<realZero>;
for (k=l; k
%<dspGivensRot(dTypeIdentifier, "&t1", "&f", "&cs", "&sn")>;
(s+k)->re = t1;
f = -sn * (e+k)->re;
(e+k)->re *= cs;
if (wantv) {
%<rot_cplx(dTypeIdentifier, "n", "cs", "sn", "u+n*k", "u+n*lm1")>;
}
}
break;
case 3: /* perform one qr step. */
/*
* Calculate the shift.
*/
scale = MAX">MAX(%<fabs>((s+mm1)->re), MAX">MAX(%<fabs>((s+mm2)->re), %<fabs>((e+mm2)->re)));
scale = MAX">MAX(%<fabs>(scale), MAX">MAX(%<fabs>((s+l)->re), %<fabs>((e+l)->re)));
sm = (s+mm1)->re / scale;
smm1 = (s+mm2)->re / scale;
emm1 = (e+mm2)->re / scale;
sl = (s+l)->re / scale;
el = (e+l)->re / scale;
b = ((smm1 + sm) * (smm1 - sm) + emm1 * emm1) / %<realTwo>;
c = sm * emm1;
c *= c;
shift = %<realZero>;
if (b != %<realZero> || c != %<realZero>) {
shift = %<sqrt>(b * b + c);
if (b < %<realZero>) shift = -shift;
shift = c / (b + shift);
}
f = (sl + sm) * (sl - sm) + shift;
g = sl * el;
/*
* Chase Zeros.
*/
for (k=l; k
%<dspGivensRot(dTypeIdentifier, "&f", "&g", "&cs", "&sn")>;
if (k != l) (e+k-1)->re = f;
f = cs * (s+k)->re + sn * (e+k)->re;
(e+k)->re = cs * (e+k)->re - sn * (s+k)->re;
g = sn * (s+kp1)->re;
(s+kp1)->re *= cs;
if (wantv) {
%<rot_cplx(dTypeIdentifier, "p", "cs", "sn", "v+k*p", "v+kp1*p")>;
}
%<dspGivensRot(dTypeIdentifier, "&f", "&g", "&cs", "&sn")>;
(s+k)->re = f;
f = cs * (e+k)->re + sn * (s+kp1)->re;
(s+kp1)->re = -sn * (e+k)->re + cs * (s+kp1)->re;
g = sn * (e+kp1)->re;
(e+kp1)->re *= cs;
if (wantv && k < nm1) {
%<rot_cplx(dTypeIdentifier, "n", "cs", "sn", "u+n*k", "u+n*kp1")>;
}
}
(e+mm2)->re = f;
++iter;
break;
case 4: /* convergence */
/*
* Make the singular value positive
*/
if ((s+l)->re < %<realZero>) {
(s+l)->re = -(s+l)->re;
if (wantv) {
/* scal(p,MinusOne,v+l*p) */
tp1 = v+l*p;
for(ii=0; ii
tp1->im = -tp1->im;
tp1++;
}
/* scal(p,MinusOne,v+l*p) */
}
}
/*
* Order the singular value.
*/
while (l != mm-1 && (s+l)->re < (s+l+1)->re) {
lp1 = l + 1;
t.re = (s+l)->re;
(s+l)->re = (s+lp1)->re;
(s+lp1)->re = t.re;
if (wantv && lp1 < p) {
/* swap(p,v+l*p,v+lp1*p) */
tp1 = v+l*p;
tp2 = v+lp1*p;
for(ii=0; ii
*tp1++ = *tp2;
*tp2++ = temp;
}
/* swap(p,v+l*p,v+lp1*p) */
}
if (wantv && lp1 < n) {
/* swap(n,u+l*n,u+lp1*n) */
tp1 = u+l*n;
tp2 = u+lp1*n;
for(ii=0; ii
*tp1++ = *tp2;
*tp2++ = temp;
}
/* swap(n,u+l*n,u+lp1*n) */
}
++l;
}
iter = 0;
m--;
break;
default:
break;
}
info = m;
}
return(info);
}
/* [EOF] dspsvdcplxalgo.tlc */