Actual source code: lmedense.c

slepc-3.18.3 2023-03-24
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */
 10: /*
 11:    Routines for solving dense matrix equations, in some cases calling SLICOT
 12: */

 14: #include <slepc/private/lmeimpl.h>
 15: #include <slepcblaslapack.h>

 17: /*
 18:    LMEDenseRankSVD - given a square matrix A, compute its SVD U*S*V', and determine the
 19:    numerical rank. On exit, U contains U*S and A is overwritten with V'
 20: */
 21: PetscErrorCode LMEDenseRankSVD(LME lme,PetscInt n,PetscScalar *A,PetscInt lda,PetscScalar *U,PetscInt ldu,PetscInt *rank)
 22: {
 23:   PetscInt       i,j,rk=0;
 24:   PetscScalar    *work;
 25:   PetscReal      tol,*sg,*rwork;
 26:   PetscBLASInt   n_,lda_,ldu_,info,lw_;

 28:   PetscCalloc3(n,&sg,10*n,&work,5*n,&rwork);
 29:   PetscBLASIntCast(n,&n_);
 30:   PetscBLASIntCast(lda,&lda_);
 31:   PetscBLASIntCast(ldu,&ldu_);
 32:   lw_ = 10*n_;
 33:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
 34: #if !defined (PETSC_USE_COMPLEX)
 35:   PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","O",&n_,&n_,A,&lda_,sg,U,&ldu_,NULL,&n_,work,&lw_,&info));
 36: #else
 37:   PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","O",&n_,&n_,A,&lda_,sg,U,&ldu_,NULL,&n_,work,&lw_,rwork,&info));
 38: #endif
 39:   PetscFPTrapPop();
 40:   SlepcCheckLapackInfo("gesvd",info);
 41:   tol = 10*PETSC_MACHINE_EPSILON*n*sg[0];
 42:   for (j=0;j<n;j++) {
 43:     if (sg[j]>tol) {
 44:       for (i=0;i<n;i++) U[i+j*n] *= sg[j];
 45:       rk++;
 46:     } else break;
 47:   }
 48:   *rank = rk;
 49:   PetscFree3(sg,work,rwork);
 50:   return 0;
 51: }

 53: #if defined(PETSC_USE_INFO)
 54: /*
 55:    LyapunovCholResidual - compute the residual norm ||A*U'*U+U'*U*A'+B*B'||
 56: */
 57: static PetscErrorCode LyapunovCholResidual(PetscInt m,PetscScalar *A,PetscInt lda,PetscInt k,PetscScalar *B,PetscInt ldb,PetscScalar *U,PetscInt ldu,PetscReal *res)
 58: {
 59:   PetscBLASInt   n,kk,la,lb,lu;
 60:   PetscScalar    *M,*R,zero=0.0,done=1.0;

 62:   *res = 0;
 63:   PetscBLASIntCast(lda,&la);
 64:   PetscBLASIntCast(ldb,&lb);
 65:   PetscBLASIntCast(ldu,&lu);
 66:   PetscBLASIntCast(m,&n);
 67:   PetscBLASIntCast(k,&kk);
 68:   PetscMalloc2(m*m,&M,m*m,&R);

 70:   /* R = B*B' */
 71:   PetscCallBLAS("BLASgemm",BLASgemm_("N","C",&n,&n,&kk,&done,B,&lb,B,&lb,&zero,R,&n));
 72:   /* M = A*U' */
 73:   PetscCallBLAS("BLASgemm",BLASgemm_("N","C",&n,&n,&n,&done,A,&la,U,&lu,&zero,M,&n));
 74:   /* R = R+M*U */
 75:   PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&done,M,&n,U,&lu,&done,R,&n));
 76:   /* R = R+U'*M' */
 77:   PetscCallBLAS("BLASgemm",BLASgemm_("C","C",&n,&n,&n,&done,U,&lu,M,&n,&done,R,&n));

 79:   *res = LAPACKlange_("F",&n,&n,R,&n,NULL);
 80:   PetscFree2(M,R);
 81:   return 0;
 82: }

 84: /*
 85:    LyapunovResidual - compute the residual norm ||A*X+X*A'+B||
 86: */
 87: static PetscErrorCode LyapunovResidual(PetscInt m,PetscScalar *A,PetscInt lda,PetscScalar *B,PetscInt ldb,PetscScalar *X,PetscInt ldx,PetscReal *res)
 88: {
 89:   PetscInt       i;
 90:   PetscBLASInt   n,la,lb,lx;
 91:   PetscScalar    *R,done=1.0;

 93:   *res = 0;
 94:   PetscBLASIntCast(lda,&la);
 95:   PetscBLASIntCast(ldb,&lb);
 96:   PetscBLASIntCast(ldx,&lx);
 97:   PetscBLASIntCast(m,&n);
 98:   PetscMalloc1(m*m,&R);

100:   /* R = B+A*X */
101:   for (i=0;i<m;i++) PetscArraycpy(R+i*m,B+i*ldb,m);
102:   PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&done,A,&la,X,&lx,&done,R,&n));
103:   /* R = R+X*A' */
104:   PetscCallBLAS("BLASgemm",BLASgemm_("N","C",&n,&n,&n,&done,X,&lx,A,&la,&done,R,&n));

106:   *res = LAPACKlange_("F",&n,&n,R,&n,NULL);
107:   PetscFree(R);
108:   return 0;
109: }
110: #endif

112: #if defined(SLEPC_HAVE_SLICOT)
113: /*
114:    HessLyapunovChol_SLICOT - implementation used when SLICOT is available
115: */
116: static PetscErrorCode HessLyapunovChol_SLICOT(PetscInt m,PetscScalar *H,PetscInt ldh,PetscInt k,PetscScalar *B,PetscInt ldb,PetscScalar *U,PetscInt ldu,PetscReal *res)
117: {
118:   PetscBLASInt   lwork,info,n,kk,lu,ione=1,sdim;
119:   PetscInt       i,j;
120:   PetscReal      scal;
121:   PetscScalar    *Q,*W,*wr,*wi,*work;

123:   PetscBLASIntCast(ldu,&lu);
124:   PetscBLASIntCast(m,&n);
125:   PetscBLASIntCast(k,&kk);
126:   PetscBLASIntCast(6*m,&lwork);
127:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);

129:   /* transpose W = H' */
130:   PetscMalloc5(m*m,&W,m*m,&Q,m,&wr,m,&wi,lwork,&work);
131:   for (j=0;j<m;j++) {
132:     for (i=0;i<m;i++) W[i+j*m] = H[j+i*ldh];
133:   }

135:   /* compute the real Schur form of W */
136:   PetscCallBLAS("LAPACKgees",LAPACKgees_("V","N",NULL,&n,W,&n,&sdim,wr,wi,Q,&n,work,&lwork,NULL,&info));
137:   SlepcCheckLapackInfo("gees",info);
138: #if defined(PETSC_USE_DEBUG)
140: #endif

142:   /* copy B' into first rows of U */
143:   for (i=0;i<k;i++) {
144:     for (j=0;j<m;j++) U[i+j*ldu] = B[j+i*ldb];
145:   }

147:   /* solve Lyapunov equation (Hammarling) */
148:   PetscCallBLAS("SLICOTsb03od",SLICOTsb03od_("C","F","N",&n,&kk,W,&n,Q,&n,U,&lu,&scal,wr,wi,work,&lwork,&info));

152:   /* resnorm = norm(H(m+1,:)*U'*U), use Q(:,1) = U'*U(:,m) */
153:   if (res) {
154:     for (j=0;j<m;j++) Q[j] = U[j+(m-1)*ldu];
155:     PetscCallBLAS("BLAStrmv",BLAStrmv_("U","C","N",&n,U,&lu,Q,&ione));
157:     *res *= BLASnrm2_(&n,Q,&ione);
158:   }

160:   PetscFPTrapPop();
161:   PetscFree5(W,Q,wr,wi,work);
162:   return 0;
163: }

165: #else

167: /*
168:    Compute the upper Cholesky factor of A
169:  */
170: static PetscErrorCode CholeskyFactor(PetscInt m,PetscScalar *A,PetscInt lda)
171: {
172:   PetscInt       i;
173:   PetscScalar    *S;
174:   PetscBLASInt   info,n,ld;

176:   PetscBLASIntCast(m,&n);
177:   PetscBLASIntCast(lda,&ld);
178:   PetscMalloc1(m*m,&S);
179:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);

181:   /* save a copy of matrix in S */
182:   for (i=0;i<m;i++) PetscArraycpy(S+i*m,A+i*lda,m);

184:   /* compute upper Cholesky factor in R */
185:   PetscCallBLAS("LAPACKpotrf",LAPACKpotrf_("U",&n,A,&ld,&info));
186:   PetscLogFlops((1.0*n*n*n)/3.0);

188:   if (info) {
189:     for (i=0;i<m;i++) {
190:       PetscArraycpy(A+i*lda,S+i*m,m);
191:       A[i+i*lda] += 50.0*PETSC_MACHINE_EPSILON;
192:     }
193:     PetscCallBLAS("LAPACKpotrf",LAPACKpotrf_("U",&n,A,&ld,&info));
194:     SlepcCheckLapackInfo("potrf",info);
195:     PetscLogFlops((1.0*n*n*n)/3.0);
196:   }

198:   /* Zero out entries below the diagonal */
199:   for (i=0;i<m-1;i++) PetscArrayzero(A+i*lda+i+1,m-i-1);
200:   PetscFPTrapPop();
201:   PetscFree(S);
202:   return 0;
203: }

205: /*
206:    HessLyapunovChol_LAPACK - alternative implementation when SLICOT is not available
207: */
208: static PetscErrorCode HessLyapunovChol_LAPACK(PetscInt m,PetscScalar *H,PetscInt ldh,PetscInt k,PetscScalar *B,PetscInt ldb,PetscScalar *U,PetscInt ldu,PetscReal *res)
209: {
210:   PetscBLASInt   ilo=1,lwork,info,n,kk,lu,lb,ione=1;
211:   PetscInt       i,j;
212:   PetscReal      scal;
213:   PetscScalar    *Q,*C,*W,*Z,*wr,*work,zero=0.0,done=1.0,dmone=-1.0;
214: #if !defined(PETSC_USE_COMPLEX)
215:   PetscScalar    *wi;
216: #endif

218:   PetscBLASIntCast(ldb,&lb);
219:   PetscBLASIntCast(ldu,&lu);
220:   PetscBLASIntCast(m,&n);
221:   PetscBLASIntCast(k,&kk);
222:   PetscBLASIntCast(6*m,&lwork);
223:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
224:   C = U;

226: #if !defined(PETSC_USE_COMPLEX)
227:   PetscMalloc6(m*m,&Q,m*m,&W,m*k,&Z,m,&wr,m,&wi,lwork,&work);
228: #else
229:   PetscMalloc5(m*m,&Q,m*m,&W,m*k,&Z,m,&wr,lwork,&work);
230: #endif

232:   /* save a copy W = H */
233:   for (j=0;j<m;j++) {
234:     for (i=0;i<m;i++) W[i+j*m] = H[i+j*ldh];
235:   }

237:   /* compute the (real) Schur form of W */
238: #if !defined(PETSC_USE_COMPLEX)
239:   PetscCallBLAS("LAPACKhseqr",LAPACKhseqr_("S","I",&n,&ilo,&n,W,&n,wr,wi,Q,&n,work,&lwork,&info));
240: #else
241:   PetscCallBLAS("LAPACKhseqr",LAPACKhseqr_("S","I",&n,&ilo,&n,W,&n,wr,Q,&n,work,&lwork,&info));
242: #endif
243:   SlepcCheckLapackInfo("hseqr",info);
244: #if defined(PETSC_USE_DEBUG)
246: #endif

248:   /* C = -Z*Z', Z = Q'*B */
249:   PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&n,&kk,&n,&done,Q,&n,B,&lb,&zero,Z,&n));
250:   PetscCallBLAS("BLASgemm",BLASgemm_("N","C",&n,&n,&kk,&dmone,Z,&n,Z,&n,&zero,C,&lu));

252:   /* solve triangular Sylvester equation */
253:   PetscCallBLAS("LAPACKtrsyl",LAPACKtrsyl_("N","C",&ione,&n,&n,W,&n,W,&n,C,&lu,&scal,&info));
254:   SlepcCheckLapackInfo("trsyl",info);

257:   /* back-transform C = Q*C*Q' */
258:   PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&done,Q,&n,C,&n,&zero,W,&n));
259:   PetscCallBLAS("BLASgemm",BLASgemm_("N","C",&n,&n,&n,&done,W,&n,Q,&n,&zero,C,&lu));

261:   /* resnorm = norm(H(m+1,:)*Y) */
262:   if (res) {
264:     *res *= BLASnrm2_(&n,C+m-1,&n);
265:   }

267:   /* U = chol(C) */
268:   CholeskyFactor(m,C,ldu);

270:   PetscFPTrapPop();
271: #if !defined(PETSC_USE_COMPLEX)
272:   PetscFree6(Q,W,Z,wr,wi,work);
273: #else
274:   PetscFree5(Q,W,Z,wr,work);
275: #endif
276:   return 0;
277: }

279: #endif /* SLEPC_HAVE_SLICOT */

281: /*@C
282:    LMEDenseHessLyapunovChol - Computes the Cholesky factor of the solution of a
283:    dense Lyapunov equation with an upper Hessenberg coefficient matrix.

285:    Logically Collective on lme

287:    Input Parameters:
288: +  lme - linear matrix equation solver context
289: .  m   - number of rows and columns of H
290: .  H   - coefficient matrix
291: .  ldh - leading dimension of H
292: .  k   - number of columns of B
293: .  B   - right-hand side matrix
294: .  ldb - leading dimension of B
295: -  ldu - leading dimension of U

297:    Output Parameters:
298: +  U   - Cholesky factor of the solution
299: -  res - (optional) residual norm, on input it should contain H(m+1,m)

301:    Note:
302:    The Lyapunov equation has the form H*X + X*H' = -B*B', where H is an mxm
303:    upper Hessenberg matrix, B is an mxk matrix and the solution is expressed
304:    as X = U'*U, where U is upper triangular. H is supposed to be stable.

306:    When k=1 and the res argument is provided, the last row of X is used to
307:    compute the residual norm of a Lyapunov equation projected via Arnoldi.

309:    Level: developer

311: .seealso: LMEDenseLyapunov(), LMESolve()
312: @*/
313: PetscErrorCode LMEDenseHessLyapunovChol(LME lme,PetscInt m,PetscScalar *H,PetscInt ldh,PetscInt k,PetscScalar *B,PetscInt ldb,PetscScalar *U,PetscInt ldu,PetscReal *res)
314: {
315: #if defined(PETSC_USE_INFO)
316:   PetscReal      error;
317: #endif


330: #if defined(SLEPC_HAVE_SLICOT)
331:   HessLyapunovChol_SLICOT(m,H,ldh,k,B,ldb,U,ldu,res);
332: #else
333:   HessLyapunovChol_LAPACK(m,H,ldh,k,B,ldb,U,ldu,res);
334: #endif

336: #if defined(PETSC_USE_INFO)
337:   if (PetscLogPrintInfo) {
338:     LyapunovCholResidual(m,H,ldh,k,B,ldb,U,ldu,&error);
339:     PetscInfo(lme,"Residual norm of dense Lyapunov equation = %g\n",(double)error);
340:   }
341: #endif
342:   return 0;
343: }

345: #if defined(SLEPC_HAVE_SLICOT)
346: /*
347:    Lyapunov_SLICOT - implementation used when SLICOT is available
348: */
349: static PetscErrorCode Lyapunov_SLICOT(PetscInt m,PetscScalar *H,PetscInt ldh,PetscScalar *B,PetscInt ldb,PetscScalar *X,PetscInt ldx)
350: {
351:   PetscBLASInt   sdim,lwork,info,n,lx,*iwork;
352:   PetscInt       i,j;
353:   PetscReal      scal,sep,ferr,*work;
354:   PetscScalar    *Q,*W,*wr,*wi;

356:   PetscBLASIntCast(ldx,&lx);
357:   PetscBLASIntCast(m,&n);
358:   PetscBLASIntCast(PetscMax(20,m*m),&lwork);
359:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);

361:   /* transpose W = H' */
362:   PetscMalloc6(m*m,&W,m*m,&Q,m,&wr,m,&wi,m*m,&iwork,lwork,&work);
363:   for (j=0;j<m;j++) {
364:     for (i=0;i<m;i++) W[i+j*m] = H[j+i*ldh];
365:   }

367:   /* compute the real Schur form of W */
368:   PetscCallBLAS("LAPACKgees",LAPACKgees_("V","N",NULL,&n,W,&n,&sdim,wr,wi,Q,&n,work,&lwork,NULL,&info));
369:   SlepcCheckLapackInfo("gees",info);

371:   /* copy -B into X */
372:   for (i=0;i<m;i++) {
373:     for (j=0;j<m;j++) X[i+j*ldx] = -B[i+j*ldb];
374:   }

376:   /* solve Lyapunov equation (Hammarling) */
377:   PetscCallBLAS("SLICOTsb03md",SLICOTsb03md_("C","X","F","N",&n,W,&n,Q,&n,X,&lx,&scal,&sep,&ferr,wr,wi,iwork,work,&lwork,&info));

381:   PetscFPTrapPop();
382:   PetscFree6(W,Q,wr,wi,iwork,work);
383:   return 0;
384: }

386: #else

388: /*
389:    Lyapunov_LAPACK - alternative implementation when SLICOT is not available
390: */
391: static PetscErrorCode Lyapunov_LAPACK(PetscInt m,PetscScalar *A,PetscInt lda,PetscScalar *B,PetscInt ldb,PetscScalar *X,PetscInt ldx)
392: {
393:   PetscBLASInt   sdim,lwork,info,n,lx,lb,ione=1;
394:   PetscInt       i,j;
395:   PetscReal      scal;
396:   PetscScalar    *Q,*W,*Z,*wr,*work,zero=0.0,done=1.0,dmone=-1.0;
397: #if defined(PETSC_USE_COMPLEX)
398:   PetscReal      *rwork;
399: #else
400:   PetscScalar    *wi;
401: #endif

403:   PetscBLASIntCast(ldb,&lb);
404:   PetscBLASIntCast(ldx,&lx);
405:   PetscBLASIntCast(m,&n);
406:   PetscBLASIntCast(6*m,&lwork);
407:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);

409: #if !defined(PETSC_USE_COMPLEX)
410:   PetscMalloc6(m*m,&Q,m*m,&W,m*m,&Z,m,&wr,m,&wi,lwork,&work);
411: #else
412:   PetscMalloc6(m*m,&Q,m*m,&W,m*m,&Z,m,&wr,lwork,&work,m,&rwork);
413: #endif

415:   /* save a copy W = A */
416:   for (j=0;j<m;j++) {
417:     for (i=0;i<m;i++) W[i+j*m] = A[i+j*lda];
418:   }

420:   /* compute the (real) Schur form of W */
421: #if !defined(PETSC_USE_COMPLEX)
422:   PetscCallBLAS("LAPACKgees",LAPACKgees_("V","N",NULL,&n,W,&n,&sdim,wr,wi,Q,&n,work,&lwork,NULL,&info));
423: #else
424:   PetscCallBLAS("LAPACKgees",LAPACKgees_("V","N",NULL,&n,W,&n,&sdim,wr,Q,&n,work,&lwork,rwork,NULL,&info));
425: #endif
426:   SlepcCheckLapackInfo("gees",info);

428:   /* X = -Q'*B*Q */
429:   PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&n,&n,&n,&done,Q,&n,B,&lb,&zero,Z,&n));
430:   PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&dmone,Z,&n,Q,&n,&zero,X,&lx));

432:   /* solve triangular Sylvester equation */
433:   PetscCallBLAS("LAPACKtrsyl",LAPACKtrsyl_("N","C",&ione,&n,&n,W,&n,W,&n,X,&lx,&scal,&info));
434:   SlepcCheckLapackInfo("trsyl",info);

437:   /* back-transform X = Q*X*Q' */
438:   PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&done,Q,&n,X,&n,&zero,W,&n));
439:   PetscCallBLAS("BLASgemm",BLASgemm_("N","C",&n,&n,&n,&done,W,&n,Q,&n,&zero,X,&lx));

441:   PetscFPTrapPop();
442: #if !defined(PETSC_USE_COMPLEX)
443:   PetscFree6(Q,W,Z,wr,wi,work);
444: #else
445:   PetscFree6(Q,W,Z,wr,work,rwork);
446: #endif
447:   return 0;
448: }

450: #endif /* SLEPC_HAVE_SLICOT */

452: /*@C
453:    LMEDenseLyapunov - Computes the solution of a dense continuous-time Lyapunov
454:    equation.

456:    Logically Collective on lme

458:    Input Parameters:
459: +  lme - linear matrix equation solver context
460: .  m   - number of rows and columns of A
461: .  A   - coefficient matrix
462: .  lda - leading dimension of A
463: .  B   - right-hand side matrix
464: .  ldb - leading dimension of B
465: -  ldx - leading dimension of X

467:    Output Parameter:
468: .  X   - the solution

470:    Note:
471:    The Lyapunov equation has the form A*X + X*A' = -B, where all are mxm
472:    matrices, and B is symmetric.

474:    Level: developer

476: .seealso: LMEDenseHessLyapunovChol(), LMESolve()
477: @*/
478: PetscErrorCode LMEDenseLyapunov(LME lme,PetscInt m,PetscScalar *A,PetscInt lda,PetscScalar *B,PetscInt ldb,PetscScalar *X,PetscInt ldx)
479: {
480: #if defined(PETSC_USE_INFO)
481:   PetscReal      error;
482: #endif


493: #if defined(SLEPC_HAVE_SLICOT)
494:   Lyapunov_SLICOT(m,A,lda,B,ldb,X,ldx);
495: #else
496:   Lyapunov_LAPACK(m,A,lda,B,ldb,X,ldx);
497: #endif

499: #if defined(PETSC_USE_INFO)
500:   if (PetscLogPrintInfo) {
501:     LyapunovResidual(m,A,lda,B,ldb,X,ldx,&error);
502:     PetscInfo(lme,"Residual norm of dense Lyapunov equation = %g\n",(double)error);
503:   }
504: #endif
505:   return 0;
506: }