Actual source code: nrefine.c
slepc-3.18.3 2023-03-24
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
10: /*
11: Newton refinement for polynomial eigenproblems.
13: References:
15: [1] T. Betcke and D. Kressner, "Perturbation, extraction and refinement
16: of invariant pairs for matrix polynomials", Linear Algebra Appl.
17: 435(3):514-536, 2011.
19: [2] C. Campos and J.E. Roman, "Parallel iterative refinement in
20: polynomial eigenvalue problems", Numer. Linear Algebra Appl. 23(4):
21: 730-745, 2016.
22: */
24: #include <slepc/private/pepimpl.h>
25: #include <slepcblaslapack.h>
27: typedef struct {
28: Mat *A,M1;
29: BV V,M2,M3,W;
30: PetscInt k,nmat;
31: PetscScalar *fih,*work,*M4;
32: PetscBLASInt *pM4;
33: PetscBool compM1;
34: Vec t;
35: } PEP_REFINE_MATSHELL;
37: typedef struct {
38: Mat E[2],M1;
39: Vec tN,ttN,t1,vseq;
40: VecScatter scatterctx;
41: PetscBool compM1;
42: PetscInt *map0,*map1,*idxg,*idxp;
43: PetscSubcomm subc;
44: VecScatter scatter_sub;
45: VecScatter *scatter_id,*scatterp_id;
46: Mat *A;
47: BV V,W,M2,M3,Wt;
48: PetscScalar *M4,*w,*wt,*d,*dt;
49: Vec t,tg,Rv,Vi,tp,tpg;
50: PetscInt idx,*cols;
51: } PEP_REFINE_EXPLICIT;
53: static PetscErrorCode MatMult_FS(Mat M ,Vec x,Vec y)
54: {
55: PEP_REFINE_MATSHELL *ctx;
56: PetscInt k,i;
57: PetscScalar *c;
58: PetscBLASInt k_,one=1,info;
60: MatShellGetContext(M,&ctx);
61: VecCopy(x,ctx->t);
62: k = ctx->k;
63: c = ctx->work;
64: PetscBLASIntCast(k,&k_);
65: MatMult(ctx->M1,x,y);
66: VecConjugate(ctx->t);
67: BVDotVec(ctx->M3,ctx->t,c);
68: for (i=0;i<k;i++) c[i] = PetscConj(c[i]);
69: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
70: PetscCallBLAS("LAPACKgetrs",LAPACKgetrs_("N",&k_,&one,ctx->M4,&k_,ctx->pM4,c,&k_,&info));
71: PetscFPTrapPop();
72: SlepcCheckLapackInfo("getrs",info);
73: BVMultVec(ctx->M2,-1.0,1.0,y,c);
74: return 0;
75: }
77: /*
78: Evaluates the first d elements of the polynomial basis
79: on a given matrix H which is considered to be triangular
80: */
81: static PetscErrorCode PEPEvaluateBasisforMatrix(PEP pep,PetscInt nm,PetscInt k,PetscScalar *H,PetscInt ldh,PetscScalar *fH)
82: {
83: PetscInt i,j,ldfh=nm*k,off,nmat=pep->nmat;
84: PetscReal *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat,t;
85: PetscScalar corr=0.0,alpha,beta;
86: PetscBLASInt k_,ldh_,ldfh_;
88: PetscBLASIntCast(ldh,&ldh_);
89: PetscBLASIntCast(k,&k_);
90: PetscBLASIntCast(ldfh,&ldfh_);
91: PetscArrayzero(fH,nm*k*k);
92: if (nm>0) for (j=0;j<k;j++) fH[j+j*ldfh] = 1.0;
93: if (nm>1) {
94: t = b[0]/a[0];
95: off = k;
96: for (j=0;j<k;j++) {
97: for (i=0;i<k;i++) fH[off+i+j*ldfh] = H[i+j*ldh]/a[0];
98: fH[j+j*ldfh] -= t;
99: }
100: }
101: for (i=2;i<nm;i++) {
102: off = i*k;
103: if (i==2) {
104: for (j=0;j<k;j++) {
105: fH[off+j+j*ldfh] = 1.0;
106: H[j+j*ldh] -= b[1];
107: }
108: } else {
109: for (j=0;j<k;j++) {
110: PetscArraycpy(fH+off+j*ldfh,fH+(i-2)*k+j*ldfh,k);
111: H[j+j*ldh] += corr-b[i-1];
112: }
113: }
114: corr = b[i-1];
115: beta = -g[i-1]/a[i-1];
116: alpha = 1/a[i-1];
117: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&alpha,H,&ldh_,fH+(i-1)*k,&ldfh_,&beta,fH+off,&ldfh_));
118: }
119: for (j=0;j<k;j++) H[j+j*ldh] += corr;
120: return 0;
121: }
123: static PetscErrorCode NRefSysSetup_shell(PEP pep,PetscInt k,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar h,PEP_REFINE_MATSHELL *ctx)
124: {
125: PetscScalar *DHii,*T12,*Tr,*Ts,*array,s,ss,sone=1.0,zero=0.0,*M4=ctx->M4,t,*v,*T;
126: const PetscScalar *m3,*m2;
127: PetscInt i,d,j,nmat=pep->nmat,lda=nmat*k,deg=nmat-1,nloc;
128: PetscReal *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat;
129: PetscBLASInt k_,lda_,lds_,nloc_,one=1,info;
130: Mat *A=ctx->A,Mk,M1=ctx->M1,P;
131: BV V=ctx->V,M2=ctx->M2,M3=ctx->M3,W=ctx->W;
132: MatStructure str;
133: Vec vc;
135: STGetMatStructure(pep->st,&str);
136: PetscMalloc3(nmat*k*k,&T12,k*k,&Tr,PetscMax(k*k,nmat),&Ts);
137: DHii = T12;
138: PetscArrayzero(DHii,k*k*nmat);
139: for (i=0;i<k;i++) DHii[k+i+i*lda] = 1.0/a[0];
140: for (d=2;d<nmat;d++) {
141: for (j=0;j<k;j++) {
142: for (i=0;i<k;i++) {
143: DHii[d*k+i+j*lda] = ((h-b[d-1])*DHii[(d-1)*k+i+j*lda]+fH[(d-1)*k+i+j*lda]-g[d-1]*DHii[(d-2)*k+i+j*lda])/(a[d-1]);
144: }
145: }
146: }
147: /* T11 */
148: if (!ctx->compM1) {
149: MatCopy(A[0],M1,DIFFERENT_NONZERO_PATTERN);
150: PEPEvaluateBasis(pep,h,0,Ts,NULL);
151: for (j=1;j<nmat;j++) MatAXPY(M1,Ts[j],A[j],str);
152: }
154: /* T22 */
155: PetscBLASIntCast(lds,&lds_);
156: PetscBLASIntCast(k,&k_);
157: PetscBLASIntCast(lda,&lda_);
158: PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,S,&lds_,S,&lds_,&zero,Tr,&k_));
159: for (i=1;i<deg;i++) {
160: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,Tr,&k_,DHii+i*k,&lda_,&zero,Ts,&k_));
161: s = (i==1)?0.0:1.0;
162: PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,fH+i*k,&lda_,Ts,&k_,&s,M4,&k_));
163: }
164: for (i=0;i<k;i++) for (j=0;j<i;j++) { t=M4[i+j*k];M4[i+j*k]=M4[j+i*k];M4[j+i*k]=t; }
166: /* T12 */
167: MatCreateSeqDense(PETSC_COMM_SELF,k,k,NULL,&Mk);
168: for (i=1;i<nmat;i++) {
169: MatDenseGetArrayWrite(Mk,&array);
170: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,DHii+i*k,&lda_,&zero,array,&k_));
171: MatDenseRestoreArrayWrite(Mk,&array);
172: BVSetActiveColumns(W,0,k);
173: BVMult(W,1.0,0.0,V,Mk);
174: if (i==1) BVMatMult(W,A[i],M2);
175: else {
176: BVMatMult(W,A[i],M3); /* using M3 as work space */
177: BVMult(M2,1.0,1.0,M3,NULL);
178: }
179: }
181: /* T21 */
182: MatDenseGetArrayWrite(Mk,&array);
183: for (i=1;i<deg;i++) {
184: s = (i==1)?0.0:1.0;
185: ss = PetscConj(fh[i]);
186: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&ss,S,&lds_,fH+i*k,&lda_,&s,array,&k_));
187: }
188: MatDenseRestoreArrayWrite(Mk,&array);
189: BVSetActiveColumns(M3,0,k);
190: BVMult(M3,1.0,0.0,V,Mk);
191: for (i=0;i<k;i++) {
192: BVGetColumn(M3,i,&vc);
193: VecConjugate(vc);
194: BVRestoreColumn(M3,i,&vc);
195: }
196: MatDestroy(&Mk);
197: PetscFree3(T12,Tr,Ts);
199: VecGetLocalSize(ctx->t,&nloc);
200: PetscBLASIntCast(nloc,&nloc_);
201: PetscMalloc1(nloc*k,&T);
202: KSPGetOperators(pep->refineksp,NULL,&P);
203: if (!ctx->compM1) MatCopy(ctx->M1,P,SAME_NONZERO_PATTERN);
204: BVGetArrayRead(ctx->M2,&m2);
205: BVGetArrayRead(ctx->M3,&m3);
206: VecGetArray(ctx->t,&v);
207: for (i=0;i<nloc;i++) for (j=0;j<k;j++) T[j+i*k] = m3[i+j*nloc];
208: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
209: PetscCallBLAS("LAPACKgesv",LAPACKgesv_(&k_,&nloc_,ctx->M4,&k_,ctx->pM4,T,&k_,&info));
210: PetscFPTrapPop();
211: SlepcCheckLapackInfo("gesv",info);
212: for (i=0;i<nloc;i++) v[i] = BLASdot_(&k_,m2+i,&nloc_,T+i*k,&one);
213: VecRestoreArray(ctx->t,&v);
214: BVRestoreArrayRead(ctx->M2,&m2);
215: BVRestoreArrayRead(ctx->M3,&m3);
216: MatDiagonalSet(P,ctx->t,ADD_VALUES);
217: PetscFree(T);
218: KSPSetUp(pep->refineksp);
219: return 0;
220: }
222: static PetscErrorCode NRefSysSolve_shell(KSP ksp,PetscInt nmat,Vec Rv,PetscScalar *Rh,PetscInt k,Vec dVi,PetscScalar *dHi)
223: {
224: PetscScalar *t0;
225: PetscBLASInt k_,one=1,info,lda_;
226: PetscInt i,lda=nmat*k;
227: Mat M;
228: PEP_REFINE_MATSHELL *ctx;
230: KSPGetOperators(ksp,&M,NULL);
231: MatShellGetContext(M,&ctx);
232: PetscCalloc1(k,&t0);
233: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
234: PetscBLASIntCast(lda,&lda_);
235: PetscBLASIntCast(k,&k_);
236: for (i=0;i<k;i++) t0[i] = Rh[i];
237: PetscCallBLAS("LAPACKgetrs",LAPACKgetrs_("N",&k_,&one,ctx->M4,&k_,ctx->pM4,t0,&k_,&info));
238: SlepcCheckLapackInfo("getrs",info);
239: BVMultVec(ctx->M2,-1.0,1.0,Rv,t0);
240: KSPSolve(ksp,Rv,dVi);
241: VecConjugate(dVi);
242: BVDotVec(ctx->M3,dVi,dHi);
243: VecConjugate(dVi);
244: for (i=0;i<k;i++) dHi[i] = Rh[i]-PetscConj(dHi[i]);
245: PetscCallBLAS("LAPACKgetrs",LAPACKgetrs_("N",&k_,&one,ctx->M4,&k_,ctx->pM4,dHi,&k_,&info));
246: SlepcCheckLapackInfo("getrs",info);
247: PetscFPTrapPop();
248: PetscFree(t0);
249: return 0;
250: }
252: /*
253: Computes the residual P(H,V*S)*e_j for the polynomial
254: */
255: static PetscErrorCode NRefRightSide(PetscInt nmat,PetscReal *pcf,Mat *A,PetscInt k,BV V,PetscScalar *S,PetscInt lds,PetscInt j,PetscScalar *H,PetscInt ldh,PetscScalar *fH,PetscScalar *DfH,PetscScalar *dH,BV dV,PetscScalar *dVS,PetscInt rds,Vec Rv,PetscScalar *Rh,BV W,Vec t)
256: {
257: PetscScalar *DS0,*DS1,*F,beta=0.0,sone=1.0,none=-1.0,tt=0.0,*h,zero=0.0,*Z,*c0;
258: PetscReal *a=pcf,*b=pcf+nmat,*g=b+nmat;
259: PetscInt i,ii,jj,lda;
260: PetscBLASInt lda_,k_,ldh_,lds_,nmat_,k2_,krds_,j_,one=1;
261: Mat M0;
262: Vec w;
264: PetscMalloc4(k*nmat,&h,k*k,&DS0,k*k,&DS1,k*k,&Z);
265: lda = k*nmat;
266: PetscBLASIntCast(k,&k_);
267: PetscBLASIntCast(lds,&lds_);
268: PetscBLASIntCast(lda,&lda_);
269: PetscBLASIntCast(nmat,&nmat_);
270: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&nmat_,&k_,&sone,S,&lds_,fH+j*lda,&k_,&zero,h,&k_));
271: MatCreateSeqDense(PETSC_COMM_SELF,k,nmat,h,&M0);
272: BVSetActiveColumns(W,0,nmat);
273: BVMult(W,1.0,0.0,V,M0);
274: MatDestroy(&M0);
276: BVGetColumn(W,0,&w);
277: MatMult(A[0],w,Rv);
278: BVRestoreColumn(W,0,&w);
279: for (i=1;i<nmat;i++) {
280: BVGetColumn(W,i,&w);
281: MatMult(A[i],w,t);
282: BVRestoreColumn(W,i,&w);
283: VecAXPY(Rv,1.0,t);
284: }
285: /* Update right-hand side */
286: if (j) {
287: PetscBLASIntCast(ldh,&ldh_);
288: PetscArrayzero(Z,k*k);
289: PetscArrayzero(DS0,k*k);
290: PetscArraycpy(Z+(j-1)*k,dH+(j-1)*k,k);
291: /* Update DfH */
292: for (i=1;i<nmat;i++) {
293: if (i>1) {
294: beta = -g[i-1];
295: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,fH+(i-1)*k,&lda_,Z,&k_,&beta,DS0,&k_));
296: tt += -b[i-1];
297: for (ii=0;ii<k;ii++) H[ii+ii*ldh] += tt;
298: tt = b[i-1];
299: beta = 1.0/a[i-1];
300: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&beta,DS1,&k_,H,&ldh_,&beta,DS0,&k_));
301: F = DS0; DS0 = DS1; DS1 = F;
302: } else {
303: PetscArrayzero(DS1,k*k);
304: for (ii=0;ii<k;ii++) DS1[ii+(j-1)*k] = Z[ii+(j-1)*k]/a[0];
305: }
306: for (jj=j;jj<k;jj++) {
307: for (ii=0;ii<k;ii++) DfH[k*i+ii+jj*lda] += DS1[ii+jj*k];
308: }
309: }
310: for (ii=0;ii<k;ii++) H[ii+ii*ldh] += tt;
311: /* Update right-hand side */
312: PetscBLASIntCast(2*k,&k2_);
313: PetscBLASIntCast(j,&j_);
314: PetscBLASIntCast(k+rds,&krds_);
315: c0 = DS0;
316: PetscArrayzero(Rh,k);
317: for (i=0;i<nmat;i++) {
318: PetscCallBLAS("BLASgemv",BLASgemv_("N",&krds_,&j_,&sone,dVS,&k2_,fH+j*lda+i*k,&one,&zero,h,&one));
319: PetscCallBLAS("BLASgemv",BLASgemv_("N",&k_,&k_,&sone,S,&lds_,DfH+i*k+j*lda,&one,&sone,h,&one));
320: BVMultVec(V,1.0,0.0,t,h);
321: BVSetActiveColumns(dV,0,rds);
322: BVMultVec(dV,1.0,1.0,t,h+k);
323: BVGetColumn(W,i,&w);
324: MatMult(A[i],t,w);
325: BVRestoreColumn(W,i,&w);
326: if (i>0 && i<nmat-1) {
327: PetscCallBLAS("BLASgemv",BLASgemv_("C",&k_,&k_,&sone,S,&lds_,h,&one,&zero,c0,&one));
328: PetscCallBLAS("BLASgemv",BLASgemv_("C",&k_,&k_,&none,fH+i*k,&lda_,c0,&one,&sone,Rh,&one));
329: }
330: }
332: for (i=0;i<nmat;i++) h[i] = -1.0;
333: BVMultVec(W,1.0,1.0,Rv,h);
334: }
335: PetscFree4(h,DS0,DS1,Z);
336: return 0;
337: }
339: static PetscErrorCode NRefSysSolve_mbe(PetscInt k,PetscInt sz,BV W,PetscScalar *w,BV Wt,PetscScalar *wt,PetscScalar *d,PetscScalar *dt,KSP ksp,BV T2,BV T3 ,PetscScalar *T4,PetscBool trans,Vec x1,PetscScalar *x2,Vec sol1,PetscScalar *sol2,Vec vw)
340: {
341: PetscInt i,j,incf,incc;
342: PetscScalar *y,*g,*xx2,*ww,y2,*dd;
343: Vec v,t,xx1;
344: BV WW,T;
346: PetscMalloc3(sz,&y,sz,&g,k,&xx2);
347: if (trans) {
348: WW = W; ww = w; dd = d; T = T3; incf = 0; incc = 1;
349: } else {
350: WW = Wt; ww = wt; dd = dt; T = T2; incf = 1; incc = 0;
351: }
352: xx1 = vw;
353: VecCopy(x1,xx1);
354: PetscArraycpy(xx2,x2,sz);
355: PetscArrayzero(sol2,k);
356: for (i=sz-1;i>=0;i--) {
357: BVGetColumn(WW,i,&v);
358: VecConjugate(v);
359: VecDot(xx1,v,y+i);
360: VecConjugate(v);
361: BVRestoreColumn(WW,i,&v);
362: for (j=0;j<i;j++) y[i] += ww[j+i*k]*xx2[j];
363: y[i] = -(y[i]-xx2[i])/dd[i];
364: BVGetColumn(T,i,&t);
365: VecAXPY(xx1,-y[i],t);
366: BVRestoreColumn(T,i,&t);
367: for (j=0;j<=i;j++) xx2[j] -= y[i]*T4[j*incf+incc*i+(i*incf+incc*j)*k];
368: g[i] = xx2[i];
369: }
370: if (trans) KSPSolveTranspose(ksp,xx1,sol1);
371: else KSPSolve(ksp,xx1,sol1);
372: if (trans) {
373: WW = Wt; ww = wt; dd = dt; T = T2; incf = 1; incc = 0;
374: } else {
375: WW = W; ww = w; dd = d; T = T3; incf = 0; incc = 1;
376: }
377: for (i=0;i<sz;i++) {
378: BVGetColumn(T,i,&t);
379: VecConjugate(t);
380: VecDot(sol1,t,&y2);
381: VecConjugate(t);
382: BVRestoreColumn(T,i,&t);
383: for (j=0;j<i;j++) y2 += sol2[j]*T4[j*incf+incc*i+(i*incf+incc*j)*k];
384: y2 = (g[i]-y2)/dd[i];
385: BVGetColumn(WW,i,&v);
386: VecAXPY(sol1,-y2,v);
387: for (j=0;j<i;j++) sol2[j] -= ww[j+i*k]*y2;
388: sol2[i] = y[i]+y2;
389: BVRestoreColumn(WW,i,&v);
390: }
391: PetscFree3(y,g,xx2);
392: return 0;
393: }
395: static PetscErrorCode NRefSysSetup_mbe(PEP pep,PetscInt k,KSP ksp,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar h,BV V,PEP_REFINE_EXPLICIT *matctx)
396: {
397: PetscInt i,j,l,nmat=pep->nmat,lda=nmat*k,deg=nmat-1;
398: Mat M1=matctx->M1,*A,*At,Mk;
399: PetscReal *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat;
400: PetscScalar s,ss,*DHii,*T12,*array,*Ts,*Tr,*M4=matctx->M4,sone=1.0,zero=0.0;
401: PetscScalar *w=matctx->w,*wt=matctx->wt,*d=matctx->d,*dt=matctx->dt;
402: PetscBLASInt lds_,lda_,k_;
403: MatStructure str;
404: PetscBool flg;
405: BV M2=matctx->M2,M3=matctx->M3,W=matctx->W,Wt=matctx->Wt;
406: Vec vc,vc2;
408: PetscMalloc3(nmat*k*k,&T12,k*k,&Tr,PetscMax(k*k,nmat),&Ts);
409: STGetMatStructure(pep->st,&str);
410: STGetTransform(pep->st,&flg);
411: if (flg) {
412: PetscMalloc1(pep->nmat,&At);
413: for (i=0;i<pep->nmat;i++) STGetMatrixTransformed(pep->st,i,&At[i]);
414: } else At = pep->A;
415: if (matctx->subc) A = matctx->A;
416: else A = At;
417: /* Form the explicit system matrix */
418: DHii = T12;
419: PetscArrayzero(DHii,k*k*nmat);
420: for (i=0;i<k;i++) DHii[k+i+i*lda] = 1.0/a[0];
421: for (l=2;l<nmat;l++) {
422: for (j=0;j<k;j++) {
423: for (i=0;i<k;i++) {
424: DHii[l*k+i+j*lda] = ((h-b[l-1])*DHii[(l-1)*k+i+j*lda]+fH[(l-1)*k+i+j*lda]-g[l-1]*DHii[(l-2)*k+i+j*lda])/a[l-1];
425: }
426: }
427: }
429: /* T11 */
430: if (!matctx->compM1) {
431: MatCopy(A[0],M1,DIFFERENT_NONZERO_PATTERN);
432: PEPEvaluateBasis(pep,h,0,Ts,NULL);
433: for (j=1;j<nmat;j++) MatAXPY(M1,Ts[j],A[j],str);
434: }
436: /* T22 */
437: PetscBLASIntCast(lds,&lds_);
438: PetscBLASIntCast(k,&k_);
439: PetscBLASIntCast(lda,&lda_);
440: PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,S,&lds_,S,&lds_,&zero,Tr,&k_));
441: for (i=1;i<deg;i++) {
442: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,Tr,&k_,DHii+i*k,&lda_,&zero,Ts,&k_));
443: s = (i==1)?0.0:1.0;
444: PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,fH+i*k,&lda_,Ts,&k_,&s,M4,&k_));
445: }
447: /* T12 */
448: MatCreateSeqDense(PETSC_COMM_SELF,k,k,NULL,&Mk);
449: for (i=1;i<nmat;i++) {
450: MatDenseGetArrayWrite(Mk,&array);
451: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,DHii+i*k,&lda_,&zero,array,&k_));
452: MatDenseRestoreArrayWrite(Mk,&array);
453: BVSetActiveColumns(W,0,k);
454: BVMult(W,1.0,0.0,V,Mk);
455: if (i==1) BVMatMult(W,A[i],M2);
456: else {
457: BVMatMult(W,A[i],M3); /* using M3 as work space */
458: BVMult(M2,1.0,1.0,M3,NULL);
459: }
460: }
462: /* T21 */
463: MatDenseGetArrayWrite(Mk,&array);
464: for (i=1;i<deg;i++) {
465: s = (i==1)?0.0:1.0;
466: ss = PetscConj(fh[i]);
467: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&ss,S,&lds_,fH+i*k,&lda_,&s,array,&k_));
468: }
469: MatDenseRestoreArrayWrite(Mk,&array);
470: BVSetActiveColumns(M3,0,k);
471: BVMult(M3,1.0,0.0,V,Mk);
472: for (i=0;i<k;i++) {
473: BVGetColumn(M3,i,&vc);
474: VecConjugate(vc);
475: BVRestoreColumn(M3,i,&vc);
476: }
478: PEP_KSPSetOperators(ksp,M1,M1);
479: KSPSetUp(ksp);
480: MatDestroy(&Mk);
482: /* Set up for BEMW */
483: for (i=0;i<k;i++) {
484: BVGetColumn(M2,i,&vc);
485: BVGetColumn(W,i,&vc2);
486: NRefSysSolve_mbe(k,i,W,w,Wt,wt,d,dt,ksp,M2,M3,M4,PETSC_FALSE,vc,M4+i*k,vc2,w+i*k,matctx->t);
487: BVRestoreColumn(M2,i,&vc);
488: BVGetColumn(M3,i,&vc);
489: VecConjugate(vc);
490: VecDot(vc2,vc,&d[i]);
491: VecConjugate(vc);
492: BVRestoreColumn(M3,i,&vc);
493: for (j=0;j<i;j++) d[i] += M4[i+j*k]*w[j+i*k];
494: d[i] = M4[i+i*k]-d[i];
495: BVRestoreColumn(W,i,&vc2);
497: BVGetColumn(M3,i,&vc);
498: BVGetColumn(Wt,i,&vc2);
499: for (j=0;j<=i;j++) Ts[j] = M4[i+j*k];
500: NRefSysSolve_mbe(k,i,W,w,Wt,wt,d,dt,ksp,M2,M3,M4,PETSC_TRUE,vc,Ts,vc2,wt+i*k,matctx->t);
501: BVRestoreColumn(M3,i,&vc);
502: BVGetColumn(M2,i,&vc);
503: VecConjugate(vc2);
504: VecDot(vc,vc2,&dt[i]);
505: VecConjugate(vc2);
506: BVRestoreColumn(M2,i,&vc);
507: for (j=0;j<i;j++) dt[i] += M4[j+i*k]*wt[j+i*k];
508: dt[i] = M4[i+i*k]-dt[i];
509: BVRestoreColumn(Wt,i,&vc2);
510: }
512: if (flg) PetscFree(At);
513: PetscFree3(T12,Tr,Ts);
514: return 0;
515: }
517: static PetscErrorCode NRefSysSetup_explicit(PEP pep,PetscInt k,KSP ksp,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar h,BV V,PEP_REFINE_EXPLICIT *matctx,BV W)
518: {
519: PetscInt i,j,d,n,n0,m0,n1,m1,nmat=pep->nmat,lda=nmat*k,deg=nmat-1;
520: PetscInt *idxg=matctx->idxg,*idxp=matctx->idxp,idx,ncols;
521: Mat M,*E=matctx->E,*A,*At,Mk,Md;
522: PetscReal *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat;
523: PetscScalar s,ss,*DHii,*T22,*T21,*T12,*Ts,*Tr,*array,*ts,sone=1.0,zero=0.0;
524: PetscBLASInt lds_,lda_,k_;
525: const PetscInt *idxmc;
526: const PetscScalar *valsc,*carray;
527: MatStructure str;
528: Vec vc,vc0;
529: PetscBool flg;
531: PetscMalloc5(k*k,&T22,k*k,&T21,nmat*k*k,&T12,k*k,&Tr,k*k,&Ts);
532: STGetMatStructure(pep->st,&str);
533: KSPGetOperators(ksp,&M,NULL);
534: MatGetOwnershipRange(E[1],&n1,&m1);
535: MatGetOwnershipRange(E[0],&n0,&m0);
536: MatGetOwnershipRange(M,&n,NULL);
537: PetscMalloc1(nmat,&ts);
538: STGetTransform(pep->st,&flg);
539: if (flg) {
540: PetscMalloc1(pep->nmat,&At);
541: for (i=0;i<pep->nmat;i++) STGetMatrixTransformed(pep->st,i,&At[i]);
542: } else At = pep->A;
543: if (matctx->subc) A = matctx->A;
544: else A = At;
545: /* Form the explicit system matrix */
546: DHii = T12;
547: PetscArrayzero(DHii,k*k*nmat);
548: for (i=0;i<k;i++) DHii[k+i+i*lda] = 1.0/a[0];
549: for (d=2;d<nmat;d++) {
550: for (j=0;j<k;j++) {
551: for (i=0;i<k;i++) {
552: DHii[d*k+i+j*lda] = ((h-b[d-1])*DHii[(d-1)*k+i+j*lda]+fH[(d-1)*k+i+j*lda]-g[d-1]*DHii[(d-2)*k+i+j*lda])/a[d-1];
553: }
554: }
555: }
557: /* T11 */
558: if (!matctx->compM1) {
559: MatCopy(A[0],E[0],DIFFERENT_NONZERO_PATTERN);
560: PEPEvaluateBasis(pep,h,0,Ts,NULL);
561: for (j=1;j<nmat;j++) MatAXPY(E[0],Ts[j],A[j],str);
562: }
563: for (i=n0;i<m0;i++) {
564: MatGetRow(E[0],i,&ncols,&idxmc,&valsc);
565: idx = n+i-n0;
566: for (j=0;j<ncols;j++) {
567: idxg[j] = matctx->map0[idxmc[j]];
568: }
569: MatSetValues(M,1,&idx,ncols,idxg,valsc,INSERT_VALUES);
570: MatRestoreRow(E[0],i,&ncols,&idxmc,&valsc);
571: }
573: /* T22 */
574: PetscBLASIntCast(lds,&lds_);
575: PetscBLASIntCast(k,&k_);
576: PetscBLASIntCast(lda,&lda_);
577: PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,S,&lds_,S,&lds_,&zero,Tr,&k_));
578: for (i=1;i<deg;i++) {
579: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,Tr,&k_,DHii+i*k,&lda_,&zero,Ts,&k_));
580: s = (i==1)?0.0:1.0;
581: PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,fH+i*k,&lda_,Ts,&k_,&s,T22,&k_));
582: }
583: for (j=0;j<k;j++) idxp[j] = matctx->map1[j];
584: for (i=0;i<m1-n1;i++) {
585: idx = n+m0-n0+i;
586: for (j=0;j<k;j++) {
587: Tr[j] = T22[n1+i+j*k];
588: }
589: MatSetValues(M,1,&idx,k,idxp,Tr,INSERT_VALUES);
590: }
592: /* T21 */
593: for (i=1;i<deg;i++) {
594: s = (i==1)?0.0:1.0;
595: ss = PetscConj(fh[i]);
596: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&ss,S,&lds_,fH+i*k,&lda_,&s,T21,&k_));
597: }
598: BVSetActiveColumns(W,0,k);
599: MatCreateSeqDense(PETSC_COMM_SELF,k,k,T21,&Mk);
600: BVMult(W,1.0,0.0,V,Mk);
601: for (i=0;i<k;i++) {
602: BVGetColumn(W,i,&vc);
603: VecConjugate(vc);
604: VecGetArrayRead(vc,&carray);
605: idx = matctx->map1[i];
606: MatSetValues(M,1,&idx,m0-n0,matctx->map0+n0,carray,INSERT_VALUES);
607: VecRestoreArrayRead(vc,&carray);
608: BVRestoreColumn(W,i,&vc);
609: }
611: /* T12 */
612: for (i=1;i<nmat;i++) {
613: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,DHii+i*k,&lda_,&zero,Ts,&k_));
614: for (j=0;j<k;j++) PetscArraycpy(T12+i*k+j*lda,Ts+j*k,k);
615: }
616: MatCreateSeqDense(PETSC_COMM_SELF,k,nmat-1,NULL,&Md);
617: for (i=0;i<nmat;i++) ts[i] = 1.0;
618: for (j=0;j<k;j++) {
619: MatDenseGetArrayWrite(Md,&array);
620: PetscArraycpy(array,T12+k+j*lda,(nmat-1)*k);
621: MatDenseRestoreArrayWrite(Md,&array);
622: BVSetActiveColumns(W,0,nmat-1);
623: BVMult(W,1.0,0.0,V,Md);
624: for (i=nmat-1;i>0;i--) {
625: BVGetColumn(W,i-1,&vc0);
626: BVGetColumn(W,i,&vc);
627: MatMult(A[i],vc0,vc);
628: BVRestoreColumn(W,i-1,&vc0);
629: BVRestoreColumn(W,i,&vc);
630: }
631: BVSetActiveColumns(W,1,nmat);
632: BVGetColumn(W,0,&vc0);
633: BVMultVec(W,1.0,0.0,vc0,ts);
634: VecGetArrayRead(vc0,&carray);
635: idx = matctx->map1[j];
636: MatSetValues(M,m0-n0,matctx->map0+n0,1,&idx,carray,INSERT_VALUES);
637: VecRestoreArrayRead(vc0,&carray);
638: BVRestoreColumn(W,0,&vc0);
639: }
640: MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
641: MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);
642: PEP_KSPSetOperators(ksp,M,M);
643: KSPSetUp(ksp);
644: PetscFree(ts);
645: MatDestroy(&Mk);
646: MatDestroy(&Md);
647: if (flg) PetscFree(At);
648: PetscFree5(T22,T21,T12,Tr,Ts);
649: return 0;
650: }
652: static PetscErrorCode NRefSysSolve_explicit(PetscInt k,KSP ksp,Vec Rv,PetscScalar *Rh,Vec dVi,PetscScalar *dHi,PEP_REFINE_EXPLICIT *matctx)
653: {
654: PetscInt n0,m0,n1,m1,i;
655: PetscScalar *arrayV;
656: const PetscScalar *array;
658: MatGetOwnershipRange(matctx->E[1],&n1,&m1);
659: MatGetOwnershipRange(matctx->E[0],&n0,&m0);
661: /* Right side */
662: VecGetArrayRead(Rv,&array);
663: VecSetValues(matctx->tN,m0-n0,matctx->map0+n0,array,INSERT_VALUES);
664: VecRestoreArrayRead(Rv,&array);
665: VecSetValues(matctx->tN,m1-n1,matctx->map1+n1,Rh+n1,INSERT_VALUES);
666: VecAssemblyBegin(matctx->tN);
667: VecAssemblyEnd(matctx->tN);
669: /* Solve */
670: KSPSolve(ksp,matctx->tN,matctx->ttN);
672: /* Retrieve solution */
673: VecGetArray(dVi,&arrayV);
674: VecGetArrayRead(matctx->ttN,&array);
675: PetscArraycpy(arrayV,array,m0-n0);
676: VecRestoreArray(dVi,&arrayV);
677: if (!matctx->subc) {
678: VecGetArray(matctx->t1,&arrayV);
679: for (i=0;i<m1-n1;i++) arrayV[i] = array[m0-n0+i];
680: VecRestoreArray(matctx->t1,&arrayV);
681: VecRestoreArrayRead(matctx->ttN,&array);
682: VecScatterBegin(matctx->scatterctx,matctx->t1,matctx->vseq,INSERT_VALUES,SCATTER_FORWARD);
683: VecScatterEnd(matctx->scatterctx,matctx->t1,matctx->vseq,INSERT_VALUES,SCATTER_FORWARD);
684: VecGetArrayRead(matctx->vseq,&array);
685: for (i=0;i<k;i++) dHi[i] = array[i];
686: VecRestoreArrayRead(matctx->vseq,&array);
687: }
688: return 0;
689: }
691: static PetscErrorCode NRefSysIter(PetscInt i,PEP pep,PetscInt k,KSP ksp,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar *H,PetscInt ldh,Vec Rv,PetscScalar *Rh,BV V,Vec dVi,PetscScalar *dHi,PEP_REFINE_EXPLICIT *matctx,BV W)
692: {
693: PetscInt j,m,lda=pep->nmat*k,n0,m0,idx;
694: PetscMPIInt root,len;
695: PetscScalar *array2,h;
696: const PetscScalar *array;
697: Vec R,Vi;
698: PEP_REFINE_MATSHELL *ctx;
699: Mat M;
701: if (!matctx || !matctx->subc) {
702: for (j=0;j<pep->nmat;j++) fh[j] = fH[j*k+i+i*lda];
703: h = H[i+i*ldh];
704: idx = i;
705: R = Rv;
706: Vi = dVi;
707: switch (pep->scheme) {
708: case PEP_REFINE_SCHEME_EXPLICIT:
709: NRefSysSetup_explicit(pep,k,ksp,fH,S,lds,fh,h,V,matctx,W);
710: matctx->compM1 = PETSC_FALSE;
711: break;
712: case PEP_REFINE_SCHEME_MBE:
713: NRefSysSetup_mbe(pep,k,ksp,fH,S,lds,fh,h,V,matctx);
714: matctx->compM1 = PETSC_FALSE;
715: break;
716: case PEP_REFINE_SCHEME_SCHUR:
717: KSPGetOperators(ksp,&M,NULL);
718: MatShellGetContext(M,&ctx);
719: NRefSysSetup_shell(pep,k,fH,S,lds,fh,h,ctx);
720: ctx->compM1 = PETSC_FALSE;
721: break;
722: }
723: } else {
724: if (i%matctx->subc->n==0 && (idx=i+matctx->subc->color)<k) {
725: for (j=0;j<pep->nmat;j++) fh[j] = fH[j*k+idx+idx*lda];
726: h = H[idx+idx*ldh];
727: matctx->idx = idx;
728: switch (pep->scheme) {
729: case PEP_REFINE_SCHEME_EXPLICIT:
730: NRefSysSetup_explicit(pep,k,ksp,fH,S,lds,fh,h,matctx->V,matctx,matctx->W);
731: matctx->compM1 = PETSC_FALSE;
732: break;
733: case PEP_REFINE_SCHEME_MBE:
734: NRefSysSetup_mbe(pep,k,ksp,fH,S,lds,fh,h,matctx->V,matctx);
735: matctx->compM1 = PETSC_FALSE;
736: break;
737: case PEP_REFINE_SCHEME_SCHUR:
738: break;
739: }
740: } else idx = matctx->idx;
741: VecScatterBegin(matctx->scatter_id[i%matctx->subc->n],Rv,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
742: VecScatterEnd(matctx->scatter_id[i%matctx->subc->n],Rv,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
743: VecGetArrayRead(matctx->tg,&array);
744: VecPlaceArray(matctx->t,array);
745: VecCopy(matctx->t,matctx->Rv);
746: VecResetArray(matctx->t);
747: VecRestoreArrayRead(matctx->tg,&array);
748: R = matctx->Rv;
749: Vi = matctx->Vi;
750: }
751: if (idx==i && idx<k) {
752: switch (pep->scheme) {
753: case PEP_REFINE_SCHEME_EXPLICIT:
754: NRefSysSolve_explicit(k,ksp,R,Rh,Vi,dHi,matctx);
755: break;
756: case PEP_REFINE_SCHEME_MBE:
757: NRefSysSolve_mbe(k,k,matctx->W,matctx->w,matctx->Wt,matctx->wt,matctx->d,matctx->dt,ksp,matctx->M2,matctx->M3 ,matctx->M4,PETSC_FALSE,R,Rh,Vi,dHi,matctx->t);
758: break;
759: case PEP_REFINE_SCHEME_SCHUR:
760: NRefSysSolve_shell(ksp,pep->nmat,R,Rh,k,Vi,dHi);
761: break;
762: }
763: }
764: if (matctx && matctx->subc) {
765: VecGetLocalSize(Vi,&m);
766: VecGetArrayRead(Vi,&array);
767: VecGetArray(matctx->tg,&array2);
768: PetscArraycpy(array2,array,m);
769: VecRestoreArray(matctx->tg,&array2);
770: VecRestoreArrayRead(Vi,&array);
771: VecScatterBegin(matctx->scatter_id[i%matctx->subc->n],matctx->tg,dVi,INSERT_VALUES,SCATTER_REVERSE);
772: VecScatterEnd(matctx->scatter_id[i%matctx->subc->n],matctx->tg,dVi,INSERT_VALUES,SCATTER_REVERSE);
773: switch (pep->scheme) {
774: case PEP_REFINE_SCHEME_EXPLICIT:
775: MatGetOwnershipRange(matctx->E[0],&n0,&m0);
776: VecGetArrayRead(matctx->ttN,&array);
777: VecPlaceArray(matctx->tp,array+m0-n0);
778: VecScatterBegin(matctx->scatterp_id[i%matctx->subc->n],matctx->tp,matctx->tpg,INSERT_VALUES,SCATTER_FORWARD);
779: VecScatterEnd(matctx->scatterp_id[i%matctx->subc->n],matctx->tp,matctx->tpg,INSERT_VALUES,SCATTER_FORWARD);
780: VecResetArray(matctx->tp);
781: VecRestoreArrayRead(matctx->ttN,&array);
782: VecGetArrayRead(matctx->tpg,&array);
783: for (j=0;j<k;j++) dHi[j] = array[j];
784: VecRestoreArrayRead(matctx->tpg,&array);
785: break;
786: case PEP_REFINE_SCHEME_MBE:
787: root = 0;
788: for (j=0;j<i%matctx->subc->n;j++) root += matctx->subc->subsize[j];
789: PetscMPIIntCast(k,&len);
790: MPI_Bcast(dHi,len,MPIU_SCALAR,root,matctx->subc->dupparent);
791: break;
792: case PEP_REFINE_SCHEME_SCHUR:
793: break;
794: }
795: }
796: return 0;
797: }
799: static PetscErrorCode PEPNRefForwardSubstitution(PEP pep,PetscInt k,PetscScalar *S,PetscInt lds,PetscScalar *H,PetscInt ldh,PetscScalar *fH,BV dV,PetscScalar *dVS,PetscInt *rds,PetscScalar *dH,PetscInt lddh,KSP ksp,PEP_REFINE_EXPLICIT *matctx)
800: {
801: PetscInt i,nmat=pep->nmat,lda=nmat*k;
802: PetscScalar *fh,*Rh,*DfH;
803: PetscReal norm;
804: BV W;
805: Vec Rv,t,dvi;
806: PEP_REFINE_MATSHELL *ctx;
807: Mat M,*At;
808: PetscBool flg,lindep;
810: PetscMalloc2(nmat*k*k,&DfH,k,&Rh);
811: *rds = 0;
812: BVCreateVec(pep->V,&Rv);
813: switch (pep->scheme) {
814: case PEP_REFINE_SCHEME_EXPLICIT:
815: BVCreateVec(pep->V,&t);
816: BVDuplicateResize(pep->V,PetscMax(k,nmat),&W);
817: PetscMalloc1(nmat,&fh);
818: break;
819: case PEP_REFINE_SCHEME_MBE:
820: if (matctx->subc) {
821: BVCreateVec(pep->V,&t);
822: BVDuplicateResize(pep->V,PetscMax(k,nmat),&W);
823: } else {
824: W = matctx->W;
825: PetscObjectReference((PetscObject)W);
826: t = matctx->t;
827: PetscObjectReference((PetscObject)t);
828: }
829: BVScale(matctx->W,0.0);
830: BVScale(matctx->Wt,0.0);
831: BVScale(matctx->M2,0.0);
832: BVScale(matctx->M3,0.0);
833: PetscMalloc1(nmat,&fh);
834: break;
835: case PEP_REFINE_SCHEME_SCHUR:
836: KSPGetOperators(ksp,&M,NULL);
837: MatShellGetContext(M,&ctx);
838: BVCreateVec(pep->V,&t);
839: BVDuplicateResize(pep->V,PetscMax(k,nmat),&W);
840: fh = ctx->fih;
841: break;
842: }
843: PetscArrayzero(dVS,2*k*k);
844: PetscArrayzero(DfH,lda*k);
845: STGetTransform(pep->st,&flg);
846: if (flg) {
847: PetscMalloc1(pep->nmat,&At);
848: for (i=0;i<pep->nmat;i++) STGetMatrixTransformed(pep->st,i,&At[i]);
849: } else At = pep->A;
851: /* Main loop for computing the i-th columns of dX and dS */
852: for (i=0;i<k;i++) {
853: /* Compute and update i-th column of the right hand side */
854: PetscArrayzero(Rh,k);
855: NRefRightSide(nmat,pep->pbc,At,k,pep->V,S,lds,i,H,ldh,fH,DfH,dH,dV,dVS,*rds,Rv,Rh,W,t);
857: /* Update and solve system */
858: BVGetColumn(dV,i,&dvi);
859: NRefSysIter(i,pep,k,ksp,fH,S,lds,fh,H,ldh,Rv,Rh,pep->V,dvi,dH+i*k,matctx,W);
860: /* Orthogonalize computed solution */
861: BVOrthogonalizeVec(pep->V,dvi,dVS+i*2*k,&norm,&lindep);
862: BVRestoreColumn(dV,i,&dvi);
863: if (!lindep) {
864: BVOrthogonalizeColumn(dV,i,dVS+k+i*2*k,&norm,&lindep);
865: if (!lindep) {
866: dVS[k+i+i*2*k] = norm;
867: BVScaleColumn(dV,i,1.0/norm);
868: (*rds)++;
869: }
870: }
871: }
872: BVSetActiveColumns(dV,0,*rds);
873: VecDestroy(&t);
874: VecDestroy(&Rv);
875: BVDestroy(&W);
876: if (flg) PetscFree(At);
877: PetscFree2(DfH,Rh);
878: if (pep->scheme!=PEP_REFINE_SCHEME_SCHUR) PetscFree(fh);
879: return 0;
880: }
882: static PetscErrorCode NRefOrthogStep(PEP pep,PetscInt k,PetscScalar *H,PetscInt ldh,PetscScalar *fH,PetscScalar *S,PetscInt lds)
883: {
884: PetscInt j,nmat=pep->nmat,deg=nmat-1,lda=nmat*k,ldg;
885: PetscScalar *G,*tau,sone=1.0,zero=0.0,*work;
886: PetscBLASInt lds_,k_,ldh_,info,ldg_,lda_;
888: PetscMalloc3(k,&tau,k,&work,deg*k*k,&G);
889: PetscBLASIntCast(lds,&lds_);
890: PetscBLASIntCast(lda,&lda_);
891: PetscBLASIntCast(k,&k_);
893: /* Form auxiliary matrix for the orthogonalization step */
894: ldg = deg*k;
895: PEPEvaluateBasisforMatrix(pep,nmat,k,H,ldh,fH);
896: PetscBLASIntCast(ldg,&ldg_);
897: PetscBLASIntCast(ldh,&ldh_);
898: for (j=0;j<deg;j++) {
899: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,fH+j*k,&lda_,&zero,G+j*k,&ldg_));
900: }
901: /* Orthogonalize and update S */
902: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
903: PetscCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&ldg_,&k_,G,&ldg_,tau,work,&k_,&info));
904: PetscFPTrapPop();
905: SlepcCheckLapackInfo("geqrf",info);
906: PetscCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&k_,&k_,&sone,G,&ldg_,S,&lds_));
908: /* Update H */
909: PetscCallBLAS("BLAStrmm",BLAStrmm_("L","U","N","N",&k_,&k_,&sone,G,&ldg_,H,&ldh_));
910: PetscCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&k_,&k_,&sone,G,&ldg_,H,&ldh_));
911: PetscFree3(tau,work,G);
912: return 0;
913: }
915: static PetscErrorCode PEPNRefUpdateInvPair(PEP pep,PetscInt k,PetscScalar *H,PetscInt ldh,PetscScalar *fH,PetscScalar *dH,PetscScalar *S,PetscInt lds,BV dV,PetscScalar *dVS,PetscInt rds)
916: {
917: PetscInt i,j,nmat=pep->nmat,lda=nmat*k;
918: PetscScalar *tau,*array,*work;
919: PetscBLASInt lds_,k_,lda_,ldh_,kdrs_,info,k2_;
920: Mat M0;
922: PetscMalloc2(k,&tau,k,&work);
923: PetscBLASIntCast(lds,&lds_);
924: PetscBLASIntCast(lda,&lda_);
925: PetscBLASIntCast(ldh,&ldh_);
926: PetscBLASIntCast(k,&k_);
927: PetscBLASIntCast(2*k,&k2_);
928: PetscBLASIntCast((k+rds),&kdrs_);
929: /* Update H */
930: for (j=0;j<k;j++) {
931: for (i=0;i<k;i++) H[i+j*ldh] -= dH[i+j*k];
932: }
933: /* Update V */
934: for (j=0;j<k;j++) {
935: for (i=0;i<k;i++) dVS[i+j*2*k] = -dVS[i+j*2*k]+S[i+j*lds];
936: for (i=k;i<2*k;i++) dVS[i+j*2*k] = -dVS[i+j*2*k];
937: }
938: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
939: PetscCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&kdrs_,&k_,dVS,&k2_,tau,work,&k_,&info));
940: SlepcCheckLapackInfo("geqrf",info);
941: /* Copy triangular matrix in S */
942: for (j=0;j<k;j++) {
943: for (i=0;i<=j;i++) S[i+j*lds] = dVS[i+j*2*k];
944: for (i=j+1;i<k;i++) S[i+j*lds] = 0.0;
945: }
946: PetscCallBLAS("LAPACKorgqr",LAPACKorgqr_(&k2_,&k_,&k_,dVS,&k2_,tau,work,&k_,&info));
947: SlepcCheckLapackInfo("orgqr",info);
948: PetscFPTrapPop();
949: MatCreateSeqDense(PETSC_COMM_SELF,k,k,NULL,&M0);
950: MatDenseGetArrayWrite(M0,&array);
951: for (j=0;j<k;j++) PetscArraycpy(array+j*k,dVS+j*2*k,k);
952: MatDenseRestoreArrayWrite(M0,&array);
953: BVMultInPlace(pep->V,M0,0,k);
954: if (rds) {
955: MatDenseGetArrayWrite(M0,&array);
956: for (j=0;j<k;j++) PetscArraycpy(array+j*k,dVS+k+j*2*k,rds);
957: MatDenseRestoreArrayWrite(M0,&array);
958: BVMultInPlace(dV,M0,0,k);
959: BVMult(pep->V,1.0,1.0,dV,NULL);
960: }
961: MatDestroy(&M0);
962: NRefOrthogStep(pep,k,H,ldh,fH,S,lds);
963: PetscFree2(tau,work);
964: return 0;
965: }
967: static PetscErrorCode PEPNRefSetUp(PEP pep,PetscInt k,PetscScalar *H,PetscInt ldh,PEP_REFINE_EXPLICIT *matctx,PetscBool ini)
968: {
969: PEP_REFINE_MATSHELL *ctx;
970: PetscScalar t,*coef;
971: const PetscScalar *array;
972: MatStructure str;
973: PetscInt j,nmat=pep->nmat,n0,m0,n1,m1,n0_,m0_,n1_,m1_,N0,N1,p,*idx1,*idx2,count,si,i,l0;
974: MPI_Comm comm;
975: PetscMPIInt np;
976: const PetscInt *rgs0,*rgs1;
977: Mat B,C,*E,*A,*At;
978: IS is1,is2;
979: Vec v;
980: PetscBool flg;
981: Mat M,P;
983: PetscMalloc1(nmat,&coef);
984: STGetTransform(pep->st,&flg);
985: if (flg) {
986: PetscMalloc1(pep->nmat,&At);
987: for (i=0;i<pep->nmat;i++) STGetMatrixTransformed(pep->st,i,&At[i]);
988: } else At = pep->A;
989: switch (pep->scheme) {
990: case PEP_REFINE_SCHEME_EXPLICIT:
991: if (ini) {
992: if (matctx->subc) {
993: A = matctx->A;
994: PetscSubcommGetChild(matctx->subc,&comm);
995: } else {
996: A = At;
997: PetscObjectGetComm((PetscObject)pep,&comm);
998: }
999: E = matctx->E;
1000: STGetMatStructure(pep->st,&str);
1001: MatDuplicate(A[0],MAT_COPY_VALUES,&E[0]);
1002: j = (matctx->subc)?matctx->subc->color:0;
1003: PEPEvaluateBasis(pep,H[j+j*ldh],0,coef,NULL);
1004: for (j=1;j<nmat;j++) MatAXPY(E[0],coef[j],A[j],str);
1005: MatCreateDense(comm,PETSC_DECIDE,PETSC_DECIDE,k,k,NULL,&E[1]);
1006: MatGetOwnershipRange(E[0],&n0,&m0);
1007: MatGetOwnershipRange(E[1],&n1,&m1);
1008: MatGetOwnershipRangeColumn(E[0],&n0_,&m0_);
1009: MatGetOwnershipRangeColumn(E[1],&n1_,&m1_);
1010: /* T12 and T21 are computed from V and V*, so,
1011: they must have the same column and row ranges */
1013: MatCreateDense(comm,m0-n0,m1_-n1_,PETSC_DECIDE,PETSC_DECIDE,NULL,&B);
1014: MatCreateDense(comm,m1-n1,m0_-n0_,PETSC_DECIDE,PETSC_DECIDE,NULL,&C);
1015: MatCreateTile(1.0,E[0],1.0,B,1.0,C,1.0,E[1],&M);
1016: MatDestroy(&B);
1017: MatDestroy(&C);
1018: matctx->compM1 = PETSC_TRUE;
1019: MatGetSize(E[0],NULL,&N0);
1020: MatGetSize(E[1],NULL,&N1);
1021: MPI_Comm_size(PetscObjectComm((PetscObject)M),&np);
1022: MatGetOwnershipRanges(E[0],&rgs0);
1023: MatGetOwnershipRanges(E[1],&rgs1);
1024: PetscMalloc4(PetscMax(k,N1),&matctx->idxp,N0,&matctx->idxg,N0,&matctx->map0,N1,&matctx->map1);
1025: /* Create column (and row) mapping */
1026: for (p=0;p<np;p++) {
1027: for (j=rgs0[p];j<rgs0[p+1];j++) matctx->map0[j] = j+rgs1[p];
1028: for (j=rgs1[p];j<rgs1[p+1];j++) matctx->map1[j] = j+rgs0[p+1];
1029: }
1030: MatCreateVecs(M,NULL,&matctx->tN);
1031: MatCreateVecs(matctx->E[1],NULL,&matctx->t1);
1032: VecDuplicate(matctx->tN,&matctx->ttN);
1033: if (matctx->subc) {
1034: MPI_Comm_size(PetscObjectComm((PetscObject)pep),&np);
1035: count = np*k;
1036: PetscMalloc2(count,&idx1,count,&idx2);
1037: VecCreateMPI(PetscObjectComm((PetscObject)pep),m1-n1,PETSC_DECIDE,&matctx->tp);
1038: VecGetOwnershipRange(matctx->tp,&l0,NULL);
1039: VecCreateMPI(PetscObjectComm((PetscObject)pep),k,PETSC_DECIDE,&matctx->tpg);
1040: for (si=0;si<matctx->subc->n;si++) {
1041: if (matctx->subc->color==si) {
1042: j=0;
1043: if (matctx->subc->color==si) {
1044: for (p=0;p<np;p++) {
1045: for (i=n1;i<m1;i++) {
1046: idx1[j] = l0+i-n1;
1047: idx2[j++] =p*k+i;
1048: }
1049: }
1050: }
1051: count = np*(m1-n1);
1052: } else count =0;
1053: ISCreateGeneral(PetscObjectComm((PetscObject)pep),count,idx1,PETSC_COPY_VALUES,&is1);
1054: ISCreateGeneral(PetscObjectComm((PetscObject)pep),count,idx2,PETSC_COPY_VALUES,&is2);
1055: VecScatterCreate(matctx->tp,is1,matctx->tpg,is2,&matctx->scatterp_id[si]);
1056: ISDestroy(&is1);
1057: ISDestroy(&is2);
1058: }
1059: PetscFree2(idx1,idx2);
1060: } else VecScatterCreateToAll(matctx->t1,&matctx->scatterctx,&matctx->vseq);
1061: P = M;
1062: } else {
1063: if (matctx->subc) {
1064: /* Scatter vectors pep->V */
1065: for (i=0;i<k;i++) {
1066: BVGetColumn(pep->V,i,&v);
1067: VecScatterBegin(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1068: VecScatterEnd(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1069: BVRestoreColumn(pep->V,i,&v);
1070: VecGetArrayRead(matctx->tg,&array);
1071: VecPlaceArray(matctx->t,(const PetscScalar*)array);
1072: BVInsertVec(matctx->V,i,matctx->t);
1073: VecResetArray(matctx->t);
1074: VecRestoreArrayRead(matctx->tg,&array);
1075: }
1076: }
1077: }
1078: break;
1079: case PEP_REFINE_SCHEME_MBE:
1080: if (ini) {
1081: if (matctx->subc) {
1082: A = matctx->A;
1083: PetscSubcommGetChild(matctx->subc,&comm);
1084: } else {
1085: matctx->V = pep->V;
1086: A = At;
1087: PetscObjectGetComm((PetscObject)pep,&comm);
1088: MatCreateVecs(pep->A[0],&matctx->t,NULL);
1089: }
1090: STGetMatStructure(pep->st,&str);
1091: MatDuplicate(A[0],MAT_COPY_VALUES,&matctx->M1);
1092: j = (matctx->subc)?matctx->subc->color:0;
1093: PEPEvaluateBasis(pep,H[j+j*ldh],0,coef,NULL);
1094: for (j=1;j<nmat;j++) MatAXPY(matctx->M1,coef[j],A[j],str);
1095: BVDuplicateResize(matctx->V,PetscMax(k,pep->nmat),&matctx->W);
1096: BVDuplicateResize(matctx->V,k,&matctx->M2);
1097: BVDuplicate(matctx->M2,&matctx->M3);
1098: BVDuplicate(matctx->M2,&matctx->Wt);
1099: PetscMalloc5(k*k,&matctx->M4,k*k,&matctx->w,k*k,&matctx->wt,k,&matctx->d,k,&matctx->dt);
1100: matctx->compM1 = PETSC_TRUE;
1101: M = matctx->M1;
1102: P = M;
1103: }
1104: break;
1105: case PEP_REFINE_SCHEME_SCHUR:
1106: if (ini) {
1107: PetscObjectGetComm((PetscObject)pep,&comm);
1108: MatGetSize(At[0],&m0,&n0);
1109: PetscMalloc1(1,&ctx);
1110: STGetMatStructure(pep->st,&str);
1111: /* Create a shell matrix to solve the linear system */
1112: ctx->V = pep->V;
1113: ctx->k = k; ctx->nmat = nmat;
1114: PetscMalloc5(nmat,&ctx->A,k*k,&ctx->M4,k,&ctx->pM4,2*k*k,&ctx->work,nmat,&ctx->fih);
1115: for (i=0;i<nmat;i++) ctx->A[i] = At[i];
1116: PetscArrayzero(ctx->M4,k*k);
1117: MatCreateShell(comm,PETSC_DECIDE,PETSC_DECIDE,m0,n0,ctx,&M);
1118: MatShellSetOperation(M,MATOP_MULT,(void(*)(void))MatMult_FS);
1119: BVDuplicateResize(ctx->V,PetscMax(k,pep->nmat),&ctx->W);
1120: BVDuplicateResize(ctx->V,k,&ctx->M2);
1121: BVDuplicate(ctx->M2,&ctx->M3);
1122: BVCreateVec(pep->V,&ctx->t);
1123: MatDuplicate(At[0],MAT_COPY_VALUES,&ctx->M1);
1124: PEPEvaluateBasis(pep,H[0],0,coef,NULL);
1125: for (j=1;j<nmat;j++) MatAXPY(ctx->M1,coef[j],At[j],str);
1126: MatDuplicate(At[0],MAT_COPY_VALUES,&P);
1127: /* Compute a precond matrix for the system */
1128: t = H[0];
1129: PEPEvaluateBasis(pep,t,0,coef,NULL);
1130: for (j=1;j<nmat;j++) MatAXPY(P,coef[j],At[j],str);
1131: ctx->compM1 = PETSC_TRUE;
1132: }
1133: break;
1134: }
1135: if (ini) {
1136: PEPRefineGetKSP(pep,&pep->refineksp);
1137: KSPSetErrorIfNotConverged(pep->refineksp,PETSC_TRUE);
1138: PEP_KSPSetOperators(pep->refineksp,M,P);
1139: KSPSetFromOptions(pep->refineksp);
1140: }
1142: if (!ini && matctx && matctx->subc) {
1143: /* Scatter vectors pep->V */
1144: for (i=0;i<k;i++) {
1145: BVGetColumn(pep->V,i,&v);
1146: VecScatterBegin(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1147: VecScatterEnd(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1148: BVRestoreColumn(pep->V,i,&v);
1149: VecGetArrayRead(matctx->tg,&array);
1150: VecPlaceArray(matctx->t,(const PetscScalar*)array);
1151: BVInsertVec(matctx->V,i,matctx->t);
1152: VecResetArray(matctx->t);
1153: VecRestoreArrayRead(matctx->tg,&array);
1154: }
1155: }
1156: PetscFree(coef);
1157: if (flg) PetscFree(At);
1158: return 0;
1159: }
1161: static PetscErrorCode NRefSubcommSetup(PEP pep,PetscInt k,PEP_REFINE_EXPLICIT *matctx,PetscInt nsubc)
1162: {
1163: PetscInt i,si,j,m0,n0,nloc0,nloc_sub,*idx1,*idx2;
1164: IS is1,is2;
1165: BVType type;
1166: Vec v;
1167: const PetscScalar *array;
1168: Mat *A;
1169: PetscBool flg;
1170: MPI_Comm contpar,child;
1172: STGetTransform(pep->st,&flg);
1173: if (flg) {
1174: PetscMalloc1(pep->nmat,&A);
1175: for (i=0;i<pep->nmat;i++) STGetMatrixTransformed(pep->st,i,&A[i]);
1176: } else A = pep->A;
1177: PetscSubcommGetChild(matctx->subc,&child);
1178: PetscSubcommGetContiguousParent(matctx->subc,&contpar);
1180: /* Duplicate pep matrices */
1181: PetscMalloc3(pep->nmat,&matctx->A,nsubc,&matctx->scatter_id,nsubc,&matctx->scatterp_id);
1182: for (i=0;i<pep->nmat;i++) MatCreateRedundantMatrix(A[i],0,child,MAT_INITIAL_MATRIX,&matctx->A[i]);
1184: /* Create Scatter */
1185: MatCreateVecs(matctx->A[0],&matctx->t,NULL);
1186: MatGetLocalSize(matctx->A[0],&nloc_sub,NULL);
1187: VecCreateMPI(contpar,nloc_sub,PETSC_DECIDE,&matctx->tg);
1188: BVGetColumn(pep->V,0,&v);
1189: VecGetOwnershipRange(v,&n0,&m0);
1190: nloc0 = m0-n0;
1191: PetscMalloc2(matctx->subc->n*nloc0,&idx1,matctx->subc->n*nloc0,&idx2);
1192: j = 0;
1193: for (si=0;si<matctx->subc->n;si++) {
1194: for (i=n0;i<m0;i++) {
1195: idx1[j] = i;
1196: idx2[j++] = i+pep->n*si;
1197: }
1198: }
1199: ISCreateGeneral(PetscObjectComm((PetscObject)pep),matctx->subc->n*nloc0,idx1,PETSC_COPY_VALUES,&is1);
1200: ISCreateGeneral(PetscObjectComm((PetscObject)pep),matctx->subc->n*nloc0,idx2,PETSC_COPY_VALUES,&is2);
1201: VecScatterCreate(v,is1,matctx->tg,is2,&matctx->scatter_sub);
1202: ISDestroy(&is1);
1203: ISDestroy(&is2);
1204: for (si=0;si<matctx->subc->n;si++) {
1205: j=0;
1206: for (i=n0;i<m0;i++) {
1207: idx1[j] = i;
1208: idx2[j++] = i+pep->n*si;
1209: }
1210: ISCreateGeneral(PetscObjectComm((PetscObject)pep),nloc0,idx1,PETSC_COPY_VALUES,&is1);
1211: ISCreateGeneral(PetscObjectComm((PetscObject)pep),nloc0,idx2,PETSC_COPY_VALUES,&is2);
1212: VecScatterCreate(v,is1,matctx->tg,is2,&matctx->scatter_id[si]);
1213: ISDestroy(&is1);
1214: ISDestroy(&is2);
1215: }
1216: BVRestoreColumn(pep->V,0,&v);
1217: PetscFree2(idx1,idx2);
1219: /* Duplicate pep->V vecs */
1220: BVGetType(pep->V,&type);
1221: BVCreate(child,&matctx->V);
1222: BVSetType(matctx->V,type);
1223: BVSetSizesFromVec(matctx->V,matctx->t,k);
1224: if (pep->scheme==PEP_REFINE_SCHEME_EXPLICIT) BVDuplicateResize(matctx->V,PetscMax(k,pep->nmat),&matctx->W);
1225: for (i=0;i<k;i++) {
1226: BVGetColumn(pep->V,i,&v);
1227: VecScatterBegin(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1228: VecScatterEnd(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1229: BVRestoreColumn(pep->V,i,&v);
1230: VecGetArrayRead(matctx->tg,&array);
1231: VecPlaceArray(matctx->t,(const PetscScalar*)array);
1232: BVInsertVec(matctx->V,i,matctx->t);
1233: VecResetArray(matctx->t);
1234: VecRestoreArrayRead(matctx->tg,&array);
1235: }
1237: VecDuplicate(matctx->t,&matctx->Rv);
1238: VecDuplicate(matctx->t,&matctx->Vi);
1239: if (flg) PetscFree(A);
1240: return 0;
1241: }
1243: static PetscErrorCode NRefSubcommDestroy(PEP pep,PEP_REFINE_EXPLICIT *matctx)
1244: {
1245: PetscInt i;
1247: VecScatterDestroy(&matctx->scatter_sub);
1248: for (i=0;i<matctx->subc->n;i++) VecScatterDestroy(&matctx->scatter_id[i]);
1249: for (i=0;i<pep->nmat;i++) MatDestroy(&matctx->A[i]);
1250: if (pep->scheme==PEP_REFINE_SCHEME_EXPLICIT) {
1251: for (i=0;i<matctx->subc->n;i++) VecScatterDestroy(&matctx->scatterp_id[i]);
1252: VecDestroy(&matctx->tp);
1253: VecDestroy(&matctx->tpg);
1254: BVDestroy(&matctx->W);
1255: }
1256: PetscFree3(matctx->A,matctx->scatter_id,matctx->scatterp_id);
1257: BVDestroy(&matctx->V);
1258: VecDestroy(&matctx->t);
1259: VecDestroy(&matctx->tg);
1260: VecDestroy(&matctx->Rv);
1261: VecDestroy(&matctx->Vi);
1262: return 0;
1263: }
1265: PetscErrorCode PEPNewtonRefinement_TOAR(PEP pep,PetscScalar sigma,PetscInt *maxits,PetscReal *tol,PetscInt k,PetscScalar *S,PetscInt lds)
1266: {
1267: PetscScalar *H,*work,*dH,*fH,*dVS;
1268: PetscInt ldh,i,j,its=1,nmat=pep->nmat,nsubc=pep->npart,rds;
1269: PetscBLASInt k_,ld_,*p,info;
1270: BV dV;
1271: PetscBool sinvert,flg;
1272: PEP_REFINE_EXPLICIT *matctx=NULL;
1273: Vec v;
1274: Mat M,P;
1275: PEP_REFINE_MATSHELL *ctx;
1277: PetscLogEventBegin(PEP_Refine,pep,0,0,0);
1279: /* the input tolerance is not being taken into account (by the moment) */
1280: its = *maxits;
1281: PetscMalloc3(k*k,&dH,nmat*k*k,&fH,k,&work);
1282: DSGetLeadingDimension(pep->ds,&ldh);
1283: PetscMalloc1(2*k*k,&dVS);
1284: STGetTransform(pep->st,&flg);
1285: if (!flg && pep->st && pep->ops->backtransform) { /* BackTransform */
1286: PetscBLASIntCast(k,&k_);
1287: PetscBLASIntCast(ldh,&ld_);
1288: PetscObjectTypeCompare((PetscObject)pep->st,STSINVERT,&sinvert);
1289: if (sinvert) {
1290: DSGetArray(pep->ds,DS_MAT_A,&H);
1291: PetscMalloc1(k,&p);
1292: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1293: PetscCallBLAS("LAPACKgetrf",LAPACKgetrf_(&k_,&k_,H,&ld_,p,&info));
1294: SlepcCheckLapackInfo("getrf",info);
1295: PetscCallBLAS("LAPACKgetri",LAPACKgetri_(&k_,H,&ld_,p,work,&k_,&info));
1296: SlepcCheckLapackInfo("getri",info);
1297: PetscFPTrapPop();
1298: DSRestoreArray(pep->ds,DS_MAT_A,&H);
1299: pep->ops->backtransform = NULL;
1300: }
1301: if (sigma!=0.0) {
1302: DSGetArray(pep->ds,DS_MAT_A,&H);
1303: for (i=0;i<k;i++) H[i+ldh*i] += sigma;
1304: DSRestoreArray(pep->ds,DS_MAT_A,&H);
1305: pep->ops->backtransform = NULL;
1306: }
1307: }
1308: if ((pep->scale==PEP_SCALE_BOTH || pep->scale==PEP_SCALE_SCALAR) && pep->sfactor!=1.0) {
1309: DSGetArray(pep->ds,DS_MAT_A,&H);
1310: for (j=0;j<k;j++) {
1311: for (i=0;i<k;i++) H[i+j*ldh] *= pep->sfactor;
1312: }
1313: DSRestoreArray(pep->ds,DS_MAT_A,&H);
1314: if (!flg) {
1315: /* Restore original values */
1316: for (i=0;i<pep->nmat;i++) {
1317: pep->pbc[pep->nmat+i] *= pep->sfactor;
1318: pep->pbc[2*pep->nmat+i] *= pep->sfactor*pep->sfactor;
1319: }
1320: }
1321: }
1322: if ((pep->scale==PEP_SCALE_DIAGONAL || pep->scale==PEP_SCALE_BOTH) && pep->Dr) {
1323: for (i=0;i<k;i++) {
1324: BVGetColumn(pep->V,i,&v);
1325: VecPointwiseMult(v,v,pep->Dr);
1326: BVRestoreColumn(pep->V,i,&v);
1327: }
1328: }
1329: DSGetArray(pep->ds,DS_MAT_A,&H);
1331: NRefOrthogStep(pep,k,H,ldh,fH,S,lds);
1332: /* check if H is in Schur form */
1333: for (i=0;i<k-1;i++) {
1334: #if !defined(PETSC_USE_COMPLEX)
1336: #else
1338: #endif
1339: }
1341: BVSetActiveColumns(pep->V,0,k);
1342: BVDuplicateResize(pep->V,k,&dV);
1343: if (pep->scheme!=PEP_REFINE_SCHEME_SCHUR) {
1344: PetscMalloc1(1,&matctx);
1345: if (nsubc>1) { /* splitting in subcommunicators */
1346: matctx->subc = pep->refinesubc;
1347: NRefSubcommSetup(pep,k,matctx,nsubc);
1348: } else matctx->subc=NULL;
1349: }
1351: /* Loop performing iterative refinements */
1352: for (i=0;i<its;i++) {
1353: /* Pre-compute the polynomial basis evaluated in H */
1354: PEPEvaluateBasisforMatrix(pep,nmat,k,H,ldh,fH);
1355: PEPNRefSetUp(pep,k,H,ldh,matctx,PetscNot(i));
1356: /* Solve the linear system */
1357: PEPNRefForwardSubstitution(pep,k,S,lds,H,ldh,fH,dV,dVS,&rds,dH,k,pep->refineksp,matctx);
1358: /* Update X (=V*S) and H, and orthogonalize [X;X*fH1;...;XfH(deg-1)] */
1359: PEPNRefUpdateInvPair(pep,k,H,ldh,fH,dH,S,lds,dV,dVS,rds);
1360: }
1361: DSRestoreArray(pep->ds,DS_MAT_A,&H);
1362: if (!flg && sinvert) PetscFree(p);
1363: PetscFree3(dH,fH,work);
1364: PetscFree(dVS);
1365: BVDestroy(&dV);
1366: switch (pep->scheme) {
1367: case PEP_REFINE_SCHEME_EXPLICIT:
1368: for (i=0;i<2;i++) MatDestroy(&matctx->E[i]);
1369: PetscFree4(matctx->idxp,matctx->idxg,matctx->map0,matctx->map1);
1370: VecDestroy(&matctx->tN);
1371: VecDestroy(&matctx->ttN);
1372: VecDestroy(&matctx->t1);
1373: if (nsubc>1) NRefSubcommDestroy(pep,matctx);
1374: else {
1375: VecDestroy(&matctx->vseq);
1376: VecScatterDestroy(&matctx->scatterctx);
1377: }
1378: PetscFree(matctx);
1379: KSPGetOperators(pep->refineksp,&M,NULL);
1380: MatDestroy(&M);
1381: break;
1382: case PEP_REFINE_SCHEME_MBE:
1383: BVDestroy(&matctx->W);
1384: BVDestroy(&matctx->Wt);
1385: BVDestroy(&matctx->M2);
1386: BVDestroy(&matctx->M3);
1387: MatDestroy(&matctx->M1);
1388: VecDestroy(&matctx->t);
1389: PetscFree5(matctx->M4,matctx->w,matctx->wt,matctx->d,matctx->dt);
1390: if (nsubc>1) NRefSubcommDestroy(pep,matctx);
1391: PetscFree(matctx);
1392: break;
1393: case PEP_REFINE_SCHEME_SCHUR:
1394: KSPGetOperators(pep->refineksp,&M,&P);
1395: MatShellGetContext(M,&ctx);
1396: PetscFree5(ctx->A,ctx->M4,ctx->pM4,ctx->work,ctx->fih);
1397: MatDestroy(&ctx->M1);
1398: BVDestroy(&ctx->M2);
1399: BVDestroy(&ctx->M3);
1400: BVDestroy(&ctx->W);
1401: VecDestroy(&ctx->t);
1402: PetscFree(ctx);
1403: MatDestroy(&M);
1404: MatDestroy(&P);
1405: break;
1406: }
1407: PetscLogEventEnd(PEP_Refine,pep,0,0,0);
1408: return 0;
1409: }