#include #include #include #include #include #include "spectrum.h" #include "improved.h" /* needed for print_interaction_memory(fp); */ #include "op.h" /* elop and els are declared here */ #ifdef USE_MPI #include "usempi.h" #endif /* Given a basis, find the spectrum using dnlaso to diagonalize the Hamiltonian. Thus one simply uses the same basis twice for zero transverse momentum. vecin retruns a pointer to a list of the associated eigenvectors. This routine does not call makephaselist() or maketenslist()! These must be called, when appropriate, before calling spectrum. The function spectrum() calls various FORTRAN routines. The routines are written in either F77 or various F90 extensions recognized by the g77 compiler. There is a rather serious problem with g77: although it allows for dynamic memory allocation, all the arrays are put on the stack, making for problems with stack overflow (why didn't they just use malloc??). This causes the code to crash with a "segmentation fault" error. See: http://gcc.gnu.org/onlinedocs/g77/Stack-Overflow.html There are two possibilities: 1. In the shell, run "limit stacksize unlimited" 2. Allocate memory in the "C" code and pass to Fortran. Both strategies are persued. */ #define PRINTIT 0 /*Print values, method, time, et cetera in spectrum(...)*/ #define USE_ARPACK 0 /* use ARPACK */ #define USE_LANDR3 1 /* use LANDR3 routines */ #define USE_LANDR2 2 /* use LANDR2 routines (no eigenvectors) */ #define USE_DNLASO 3 /* use DNLASO */ #define LANCZOS 1 /* choose the appropriate option */ /*************************************************************************/ /* initializes the parameters in s. If o == 0, no orientation reversal symmetry o == 1, even symmetry o ==-1, odd symmetry Likewise for charge conjugation. Does not initialize angle or ht if not needed. kt is twice the discretized momentum. */ void initspectrum(specify_calculation *s, enum state_type type, int *ht, int nptrunc, unsigned int kt, int charge, int o, int multi, integer momflag, element *angle, integer nval, int vectorflag){ int i; s->type=type; if(type==TYPE_GLUE || type==TYPE_SOURCE) for(i=0; iht[i]=ht[i]; s->kt=kt; s->nptrunc=nptrunc; s->charge=charge; s->o=o; s->multi=multi; s->momflag=momflag; if(type==TYPE_SOURCE || momflag) for(i=0; iangle[i]=angle[i]; s->nval=nval; s->vectorflag=vectorflag; /* global_kt is global variable in improved.h */ if(kt>global_kt)global_kt=kt; } /* initializes the basis parameters in x. */ void specifytobasis(specify_calculation *s, full_basis *x){ int i; if(x->length>0)free_basis(x); x->type=s->type; for(i=0; iht[i]=s->ht[i]; x->nptrunc=s->nptrunc; x->kt=s->kt; x->o=s->o; x->charge=s->charge; x->multi=s->multi; } #ifdef USE_MPI void FindChunk(integer *Size, size_t totaln) { size_mem tempsize; int group_size; int PeID; MPI_Comm_size(mpi_group, &group_size); MPI_Comm_rank(mpi_group,&PeID); *Size = (totaln * (PeID + 1))/group_size - (totaln * PeID)/group_size; MPI_Reduce(Size, &tempsize, 1, MPI_SIZE_T, MPI_SUM, 0, mpi_group); #if 0 /* debugging print */ printf("******FindChunk*** myrank=%i mysize=%li totalsize=%i\n", PeID,*Size,totaln); #endif if(PeID == 0 && totaln != tempsize) fprintf(stderr,__FILE__": The tempsizes don't add up! tempsize=" SIZE_MEM_PRINT " n=" SIZE_T_PRINT "\n",tempsize,els.n); } #endif /* remember largest number of nonzero matrix elements and maximum memory use. In the mpi case, find the group aggregate use. */ size_mem max_use(elslist *els){ size_mem memory=els->rc_length+els->i_length+els->j_length,total_memory; size_mem my_nonzero=els->j[els->n]; size_mem total_nonzero; #ifdef USE_MPI int my_rank; MPI_Comm_rank(mpi_group,&my_rank); MPI_Reduce(&memory,&total_memory,1,MPI_SIZE_T,MPI_SUM,0,mpi_group); MPI_Reduce(&my_nonzero,&total_nonzero,1,MPI_SIZE_T,MPI_SUM,0,mpi_group); #else int my_rank=0; total_nonzero=my_nonzero; total_memory=memory; #endif if(my_rank==0 && max_els.memoryn; max_els.nonzero = total_nonzero; } return total_nonzero; } /* this routine does all the important stuff WARNING, spectrum() may use memory past the end of the returned eigenvalues and eigenvectors. In any case, it may realloc() the memory. */ void spectrum( specify_calculation *s, /* calculation specification, filled by initspectrum(...) */ element *couplings, /* coupling constants */ eigensystem *eigen, /* the results */ full_basis *basis /* pointer to the basis created */ ){ int i; element beta; integer max_op=150; /* maximum number of matrix-vector multiplies */ integer n, ierr=0, nperm=0; doublereal *work=NULL; doublecomplex *cwork=NULL; size_t temp_length; size_mem total_nonzero; /* total number of nonzero matrix elements */ integer tryval=s->nval; /* number of eigenvalues to find */ /* Timing information */ int j; clock_t t1; time_t t2,tf; int print_info=PRINTIT; /* flag for printing run-time information */ #ifdef USE_MPI int my_rank=0; MPI_Comm_rank(mpi_group,&my_rank); print_info=(print_info&&(my_rank==0)); #elif defined(IBM_SP2) int my_rank=0; #endif t1=clock(); time(&t2); if(print_info){ printf(" %s spectrum: h=(",state_type_string(s->type)); for(j=0; jht[j]); printf("), %i-particles, K=%i/2, charge=%i, o=%i, multi=%i", s->nptrunc,s->kt,s->charge,s->o,s->multi); if(s->momflag){ if(s->type==TYPE_SOURCE) printf("\n (%.12g %.12g)",s->angle[HINDEX],s->angle[HINDEX+1]); else printf("\n (%.12g %.12g)",s->angle[0],s->angle[1]); } printf("\n"); } /* make basis, etc eventually, init_params should take over the job of the various initialization routines that follow. */ init_params(s->nptrunc,s->kt,s->momflag,s->angle); beta=mapcouplings(couplings,s->kt); specifytobasis(s,basis); makebasis(basis); if(s->momflag){ if(s->type==TYPE_SOURCE){ /*bvds July 27, 2000 typo fixed */ maketenslist(s->angle[HINDEX+1],s->kt,s->angle[HINDEX],beta); } else { makephaselist(s->angle,s->kt); } } time(&tf); if(print_info){ printf(" %.2f sec (%li wall) to construct basis, %i states\n", (clock()-t1)/(float) CLOCKS_PER_SEC,tf-t2,basis->length); #if 0 /* print out memory usage */ printf(__FILE__":%i memory usage\n",__LINE__); print_memory_usage(stdout,basis); #endif } t1=clock(); time(&t2); /* Choose Hamiltonian. If the two bases are equal then use real valued, else use complex case (nonzero P_1). Nonzero string tension must always be complex valued. */ if(s->type == TYPE_MESON || s->type == TYPE_SOURCE || s->momflag){ if(print_info)printf(" Use hamelsc()"); hamelsc(&els,basis,couplings); }else{ if(print_info)printf(" Use hamels()"); hamels(&els,basis,couplings); } time(&tf); if(print_info){ printf(" in %.2f sec (%li wall)",(clock()-t1)/(float) CLOCKS_PER_SEC, tf-t2); } /* handle error in making Hamiltonian */ if(els.j==NULL){ if(print_info)printf("\n"); fprintf(stderr,__FILE__": error making Hamiltonian\n"); goto error; } total_nonzero=max_use(&els); /* determine memory usage of els */ if(print_info){ printf(", " SIZE_MEM_PRINT " nonzero\n",total_nonzero); #if 0 /* print out memory usage */ printf(__FILE__":%i memory usage\n",__LINE__); print_memory_usage(stdout,basis); #endif } #if 0 /* print out matrix elements of the Hamiltonian */ print_els(&els,10); #endif #if 0 /* print out the length of the longest row */ for(i=0, j=0; ij) j=els.j[i+1]-els.j[i]; printf("longest row is %i\n",j); #endif if(els.n==0){ fprintf(stderr,__FILE__": Matrix of size zero in spectrum().\n"); goto error; } if(els.j==NULL){ fprintf(stderr,__FILE__": spectrum() detecting an error in hamels or hamelsc\n"); goto error; } /* In g77, dynamic allocation is put on the stack. Increase the stack limit to infinity */ #ifdef __GNUC__ /* assume that g77 is being used */ { #include struct rlimit rlim; getrlimit (RLIMIT_STACK, &rlim); rlim.rlim_cur = RLIM_INFINITY; /* could use rlim.rlim_max */ if(setrlimit (RLIMIT_STACK, &rlim)){ fprintf(stderr,__FILE__": Error in increasing stack to infinity\n"); goto error; } } #endif /* If there are degenerate eigenvalues, LANSO has trouble finding the second eigenvalue. The correct solution to this problem is to use a block lanczos alogrithm. A less elegant solution is to increase the number of ritz pairs sought. On the transverse lattice, the only exact degeneracies are for 3+1 dimensions, J_z=1 states at zero transverse momentum. Generally, one uses the lattice symmetries and no degeneracies occur. */ #if LANCZOS!=USE_DNLASO /* this is not a problem for block lanczos */ if(tryval>1 && HINDEX==2 && (s->multi==0 || s->multi==2)){ tryval+=20; /* number suggested bye the LANSO README */ if(tryval>els.n)tryval=els.n; } #endif /* Adjust max_op to something reasonable */ #if 0 /* Maxop is generally limited by available memory */ if(max_op<2*tryval)max_op=2*tryval; #endif if (els.nvectors.r == NULL){ eigen->vectors.r = (doublereal *) eigen->vectors.c; eigen->vectors.c= NULL; } #if LANCZOS==USE_DNLASO /*********************************************************************/ { integer bet; integer nblock=3 /*2 Krylov block size */; integer maxop=max_op/nblock; /* number of block iteractions */ integer maxj=maxop*nblock; /* memory set aside for qstore */ if(n>6*nblock && n>=maxj){ /* maxj determines how much memory dnlaso uses. The minimum value is 6*nblock and is faster if larger. The limiting factor should be the amount of memory available. */ integer negval,*ind=NULL,post; integer nfig=16; /* 16 digits of precision (Wu: "there is no point in having less than full precision since the convergence is quadratic once one is close to the right values." */ temp_length=n*tryval*sizeof(*eigen->vectors.r); if(temp_length > eigen->vectors.length){ eigen->vectors.r=realloc(eigen->vectors.r, eigen->vectors.length=temp_length); if(NULL==eigen->vectors.r){ fprintf(stderr,__FILE__": eigenvectors not allocated\n"); goto error; } } temp_length=4*tryval*sizeof(*eigen->values); if(temp_length > eigen->values_length){ if(NULL==(eigen->values=realloc(eigen->values, eigen->values_length=temp_length))){ fprintf(stderr,__FILE__": eigenvalues not allocated\n"); goto error; } } if(NULL==(ind=malloc(tryval*sizeof(integer)))){ fprintf(stderr,__FILE__": ind not allocated\n"); goto error; } if(nblock==1 && tryval>1) fprintf(stderr,__FILE__": nblock=%li is too small\n",nblock); bet = nblock + 2*tryval+6 + maxj*(2*nblock+1) + maxj + (2*nblock+1)*(nblock+1); if(n*nblock>bet)bet=n*nblock; bet += 2*n*nblock + 3*tryval + maxj*(nblock+1) + 2*nblock*nblock + maxj*(tryval+1); /* calculate memory usage of the dnlaso post-processor, if larger than bet, write error. in dnlaso, make sure that nperm is always less than tryval. */ post = 2*tryval*tryval; if(n*nblock>post)post=n*nblock; post += 2*n*nblock+1 + 2*tryval*tryval + 2*tryval+4 + (2*nblock+1)*(nblock+1); if(post>bet){ fprintf(stderr,__FILE__": post larger than bet in spectrum\n"); goto error; } if(NULL==(work=malloc(bet*sizeof(doublereal)))){ fprintf(stderr,__FILE__": work not allocated\n"); goto error; } for(i=0; ivalues, &n, eigen->vectors.r, &nblock, &maxop, &maxj, work, ind, &ierr); time(&tf); if(print_info){ printf(" Lanczos with DNLASO in %.2f seconds (%li wall), " "%li*%li multiplies\n", (clock()-t1)/(float) CLOCKS_PER_SEC,tf-t2,ind[0],nblock); } free(ind); if(ierr!=0){ fprintf(stderr,__FILE__": DNLASO returning error = %li\n",ierr); goto error; } } else { /* If the matrix is too small for LASO routines, set up memory allocation for LAPACK routine DSYEV. In this case we have to "unpack" the lower triangle of the matrix from els and store it in vec. Here we put the "upper triangle" of the matrix into eigenvectors. Hence, uplo='u' */ char jobz='v',uplo='u'; temp_length=n*n*sizeof(*eigen->vectors.r); if(temp_length > eigen->vectors.length){ eigen->vectors.r=realloc(eigen->vectors.r, eigen->vectors.length=temp_length); if(NULL==eigen->vectors.r){ fprintf(stderr,__FILE__": eigen->vectors.r not allocated\n"); goto error; } } temp_length=n*sizeof(*eigen->values); if(temp_length > eigen->values_length){ if(NULL==(eigen->values=realloc(eigen->values, eigen->values_length=temp_length))){ fprintf(stderr,__FILE__": eigenvalues not allocated\n"); goto error; } } if(NULL==(work=malloc((bet=3*n+10)*sizeof(doublereal)))){ fprintf(stderr,__FILE__": els.i not allocated\n"); goto error; } for(i=0; ivectors.r[i]=0.0; /* The following is adapted from the subroutine elop listed below. */ for(i=0, j=els.j[0]; jvectors.r[els.i[j]*n+i]=els.r[j]; #if 0 /* Print reconstructed matrix for debugging. */ for(i=0; ivectors.r[i*n+j]); printf("\n"); } #endif /* LAPACK routine dsyev to give eigenvalues and optionally eigenvectors of a symmetric matrix. The eigenvectors are returned in eigenvectors. */ DSYEV(&jobz,&uplo,&n,eigen->vectors.r,&n,eigen->values,work,&bet, &ierr,1,1); #if PRINTIT printf("LAPACK routine DYSEV, ierr=%li\n",ierr); #endif if(ierr!=0) fprintf(stderr,__FILE__": DSYEV returning error = %li\n",ierr); nperm=tryval; } } #elif LANCZOS==USE_ARPACK /*************** version which uses ARPACK ********/ { integer bet; integer ncv=3*tryval; /* number of Arnoldi vectors */ char sa[]={'S','A'},all='A'; integer iparam[11]={1,0,1+(max_op/ncv),1,0,0,1,0},ipntr[11]; logical rvec=s->vectorflag,*select=NULL; doublereal *dummy=NULL,*workd,*workl; doublereal *qstore; /* the lanczos basis vectors */ doublereal tol=0.0; /* minimum tol */ char jobz[]="I"; /* standard eigenvalue problem */ integer ido=0; /* first call to DSAUPD */ integer lworkl=ncv*(ncv+8); temp_length=n*tryval*sizeof(*eigen->vectors.r); if(temp_length > eigen->vectors.length){ eigen->vectors.r=realloc(eigen->vectors.r, eigen->vectors.length=temp_length); if(NULL==eigen->vectors.r){ fprintf(stderr,__FILE__": eigenvectors not allocated\n"); goto error; } } temp_length=n*sizeof(*eigen->values); if(temp_length > eigen->values_length){ if(NULL==(eigen->values=realloc(eigen->values, eigen->values_length=temp_length))){ fprintf(stderr,__FILE__": eigenvalues not allocated\n"); goto error; } } select=malloc(ncv*sizeof(logical)); bet=4*n+lworkl; if(NULL==(work=malloc(bet*sizeof(doublereal)))){ fprintf(stderr,__FILE__": work not allocated\n"); goto error; } /* this is the main memory user */ if(NULL==(qstore=malloc(n*ncv*sizeof(doublereal)))){ fprintf(stderr,__FILE__": qstore not allocated\n"); goto error; } workd=work+n; workl=workd+3*n; ierr=0; /* use random initial vector */ for(;;){ DSAUPD(&ido, jobz, &n, sa, &tryval, &tol, work, &ncv, qstore, &n, iparam, ipntr, workd, workl, &lworkl, &ierr,1,2); #if 0 /*debugging print */ printf("in loop, ido=%li ierr=%li,iparam[4]=%li\n", ido,ierr,iparam[4]); printf("ipntr= %li %li, n=%li, ncv=%li\n",ipntr[0],ipntr[1],n,ncv); #endif if(ido==1 || ido==-1) /* remember FORTRAN uses an offset of 1 */ OP(&n,NULL,workd+ipntr[0]-1,workd+ipntr[1]-1); else break; } if(ierr<0){ fprintf(stderr,__FILE__": ARPACK DSAUPD returning error = %li\n",ierr); goto error; } /* call dseupd ( rvec, 'All', select, d, v, ldv, sigma, & bmat, n, which, nev, tol, work, ncv, v, ldv, & iparam, ipntr, workd,workl, &lworkl, ierr ) */ DSEUPD(&rvec, &all, select, eigen->values, eigen->vectors.r, &n, dummy, jobz, &n, sa, &tryval, &tol, work, &ncv, qstore, &n, iparam, ipntr, workd, workl, &lworkl, &ierr,1,1,2); time(&tf); if(print_info){ printf(" Lanczos with ARPACK in %.2f seconds (%li wall), " "%li multiplies, %li interations\n", (clock()-t1)/(float) CLOCKS_PER_SEC,tf-t2,iparam[8],iparam[2]); } free(select); free(qstore); if(ierr!=0){ fprintf(stderr,__FILE__": ARPACK DSEUPD returning error = %li\n",ierr); goto error; } } #elif LANCZOS==USE_LANDR2 /************** choose LANDR2 ****************/ { integer lanmax=max_op; /* maximum lancsos steps */ doublereal *Bnd = NULL; integer lohi = -1; /* lowest eigenvalues */ integer Msglvl = 0; /* message level for landr2 (0 to 20) */ doublereal CondM = 1.0; doublereal Kappa = 0.0; /* desired precision (zero gives best possible) */ integer J = 0; /*outputs j is number of lanzcos steps taken, neig is number of eigenvalues converged */ if(s->vectorflag){ fprintf(stderr,__FILE__": PLANDR2 does not find eigenvectors!\n"); exit(56); } temp_length=lanmax*sizeof(*eigen->values); if(temp_length > eigen->values_length){ if(NULL==(eigen->values=realloc(eigen->values, eigen->values_length=temp_length))){ fprintf(stderr,__FILE__": eigenvalues not allocated\n"); goto error; } } /* if lanmax is changed then this is all kinds of trouble */ if (NULL==(Bnd = malloc(lanmax * sizeof(*Bnd)))){ fprintf(stderr, "Bnd not allocated\n"); goto error; } /* this is the main memory user */ if(NULL==(work=malloc(n*lanmax*sizeof(doublereal)))){ fprintf(stderr,__FILE__": work not allocated\n"); goto error; } for(i=0; ivalues, Bnd, work, &ierr, &Msglvl); time(&tf); if(print_info){ printf(" Lanczos with LANDR2 in %.2f seconds (%li wall), " "%li multiplies\n", (clock()-t1)/(float) CLOCKS_PER_SEC,tf-t2,J); } free(Bnd); if(ierr!=0){ fprintf(stderr, "PLANDR2 returning error = %li\n", ierr); goto error; } } #elif LANCZOS==USE_LANDR3 /************** choose LANDR3 ****************/ { integer lanmax=max_op; /* maximum lancsos steps */ doublereal *Bnd = NULL; integer lohi = -1; /* lowest eigenvalues */ integer Msglvl = 0; /* message level for lanso2 (0 to 20) */ doublereal CondM = 1.0; doublereal Kappa = 0.0; /* Gives desired precision (zero gives best possible) */ integer J = 0; /*outputs j is number of lanzcos steps taken, neig is number of eigenvalues converged */ temp_length=n*tryval*sizeof(*eigen->vectors.r); if(temp_length > eigen->vectors.length){ eigen->vectors.r=realloc(eigen->vectors.r, eigen->vectors.length=temp_length); if(NULL==eigen->vectors.r){ fprintf(stderr,__FILE__": eigenvectors not allocated\n"); goto error; } } temp_length=lanmax*sizeof(*eigen->values); if(temp_length > eigen->values_length){ if(NULL==(eigen->values=realloc(eigen->values, eigen->values_length=temp_length))){ fprintf(stderr,__FILE__": eigenvalues not allocated\n"); goto error; } } /* if lanmax is changed then this is all kinds of trouble */ if (NULL==(Bnd = malloc(lanmax * sizeof(*Bnd)))){ fprintf(stderr, "Bnd not allocated\n"); goto error; } /* this is the main memory user */ if(NULL==(work=malloc(n*lanmax*sizeof(doublereal)))){ fprintf(stderr,__FILE__": work not allocated\n"); goto error; } for(i=0; ivalues, Bnd, &(s->vectorflag), eigen->vectors.r, work, &ierr, &Msglvl, &mpi_group); #else LANDR3(&n, &lanmax, &tryval, &lohi, &CondM, &Kappa, &J, &nperm, eigen->values, Bnd, &(s->vectorflag), eigen->vectors.r, work, &ierr, &Msglvl); #endif time(&tf); if(print_info){ printf(" Lanczos with LANDR3 in %.2f seconds (%li wall), " "%li multiplies\n", (clock()-t1)/(float) CLOCKS_PER_SEC,tf-t2,J); } free(Bnd); if(ierr!=0){ fprintf(stderr, "LANDR3 returning error = %li\n", ierr); goto error; } } #else fprintf(stderr,__FILE__": No appropriate diagonalization chosen\n"); goto error; #endif /***********************complex case*******************************/ } else if(els.c != NULL) { /* switch the memory pointers around */ if(eigen->vectors.c==NULL){ eigen->vectors.c = (doublecomplex *) eigen->vectors.r; eigen->vectors.r= NULL; } /************** ZLANDR3 ****************/ #if LANCZOS==USE_LANDR3 { integer lanmax=max_op; /* maximum lancsos steps */ doublereal *Bnd = NULL; integer lohi = -1; /* lowest eigenvalues */ integer Msglvl = 0; /* message level for zlanso2 (0 to 20) */ doublereal CondM = 1.0; doublereal Kappa = 0.0; /* Desired precision (zero is best possible) */ integer J = 0; /* outputs j is number of lanzcos steps taken, neig is number of eigenvalues converged */ temp_length=n*tryval*sizeof(*eigen->vectors.c); if(s->vectorflag && temp_length > eigen->vectors.length){ eigen->vectors.c=realloc(eigen->vectors.c, eigen->vectors.length=temp_length); if(NULL==eigen->vectors.c){ fprintf(stderr,__FILE__": eigenvectors not allocated\n"); goto error; } } temp_length=lanmax*sizeof(*eigen->values); if(temp_length > eigen->values_length){ if(NULL==(eigen->values=realloc(eigen->values, eigen->values_length=temp_length))){ fprintf(stderr,__FILE__": eigenvalues not allocated\n"); goto error; } } /* if lanmax is changed then this is all kinds of trouble */ if (NULL==(Bnd = malloc(lanmax * sizeof(*Bnd)))){ fprintf(stderr, "Bnd not allocated\n"); goto error; } /* this is the main memory user */ if(NULL==(cwork=malloc(n*lanmax*sizeof(*cwork)))){ fprintf(stderr,__FILE__": work not allocated\n"); goto error; } for(i=0; ivalues, Bnd, &(s->vectorflag), eigen->vectors.c, cwork, &ierr, &Msglvl, &mpi_group); #else ZLANDR3(&n, &lanmax, &tryval, &lohi, &CondM, &Kappa, &J, &nperm, eigen->values, Bnd, &(s->vectorflag), eigen->vectors.c, cwork, &ierr, &Msglvl); #endif time(&tf); if(print_info){ printf(" Lanczos with ZLANDR3 in %.2f seconds (%li wall), " "%li multiplies\n", (clock()-t1)/(float) CLOCKS_PER_SEC,tf-t2,J); } free(Bnd); if(ierr!=0){ fprintf(stderr, "ZLANDR3 returning error = %li\n", ierr); goto error; } } /*********************************************************************/ #elif LANCZOS==USE_ARPACK { integer bet; integer ncv=3*tryval+1; /* number of Arnoldi basis vectors */ char sr[]={'S','R'}; /* smallest real part */ char all[]={'A'}; integer iparam[11]={1,0,max_op/ncv,1,0,0,1,0},ipntr[14]; logical rvec=s->vectorflag,*select=NULL; doublecomplex *dummy=NULL; integer ido=0; /* first call to DSAUPD */ doublecomplex *qstore; /* the lanczos basis vectors */ doublecomplex *workd,*workl,*workev; doublecomplex *unreal; /* the eigenvalues */ doublereal tol=0.0; /* minimum tol */ char jobz[]={'I'}; /* standard eigenvalue problem */ integer lworkl=ncv*(3*ncv+5); ierr=0; /* use random initial vector */ temp_length=n*tryval*sizeof(*eigen->vectors.c); if(temp_length > eigen->vectors.length){ eigen->vectors.c=realloc(eigen->vectors.c, eigen->vectors.length=temp_length); if(NULL==eigen->vectors.c){ fprintf(stderr,__FILE__": eigenvectors not allocated\n"); goto error; } } temp_length=n*sizeof(*eigen->values); if(temp_length > eigen->values_length){ if(NULL==(eigen->values=realloc(eigen->values, eigen->values_length=temp_length))){ fprintf(stderr,__FILE__": eigenvalues not allocated\n"); goto error; } } select=malloc(ncv*sizeof(logical)); bet=4*n+lworkl+3*ncv; if(NULL==(cwork=malloc(bet*sizeof(doublecomplex)))){ fprintf(stderr,__FILE__": cwork not allocated\n"); goto error; } if(NULL==(work=malloc(ncv*sizeof(doublereal)))){ fprintf(stderr,__FILE__": work not allocated\n"); goto error; } /* this is the main memory user */ if(NULL==(qstore=malloc(n*ncv*sizeof(doublecomplex)))){ fprintf(stderr,__FILE__": qstore not allocated\n"); goto error; } if(NULL==(unreal=malloc((tryval+1)*sizeof(doublecomplex)))){ fprintf(stderr,__FILE__": unreal not allocated\n"); goto error; } workev=cwork+n; workd=workev+3*ncv; workl=cwork+4*n+3*ncv; for(;;){ /* c call znaupd c ( IDO, BMAT, N, WHICH, NEV, TOL, RESID, NCV, V, LDV, IPARAM, c IPNTR, WORKD, WORKL, LWORKL, RWORK, INFO ) */ ZNAUPD(&ido, jobz, &n, sr, &tryval, &tol, cwork, &ncv, qstore, &n, iparam, ipntr, workd, workl, &lworkl, work, &ierr,1,2); #if 0 /*debugging print */ printf("in loop, ido=%li ierr=%li,iparam[4]=%li\n", ido,ierr,iparam[4]); printf("ipntr= %li %li, n=%li, ncv=%li\n",ipntr[0],ipntr[1],n,ncv); #endif if(ido==1 || ido==-1) /* remember FORTRAN uses an offset of 1 */ ZOP(&n,NULL,workd+ipntr[0]-1,workd+ipntr[1]-1); else break; } if(ierr<0){ fprintf(stderr,__FILE__": ARPACK DSAUPD returning error = %li\n",ierr); goto error; } /* c call zneupd c ( RVEC, HOWMNY, SELECT, D, Z, LDZ, SIGMA, WORKEV, BMAT, c N, WHICH, NEV, TOL, RESID, NCV, V, LDV, IPARAM, IPNTR, WORKD, c WORKL, LWORKL, RWORK, INFO ) */ ZNEUPD(&rvec, all, select, unreal, eigen->vectors.c, &n, dummy, workev, jobz, &n, sr, &tryval, &tol, cwork, &ncv, qstore, &n, iparam, ipntr, workd, workl, &lworkl, work, &ierr,1,1,2); time(&tf); if(print_info){ printf(" Lanczos with ARPACK in %.2f seconds " "(%li wall), %li multiplies, %li updates\n", (clock()-t1)/(float) CLOCKS_PER_SEC,tf-t2,iparam[8],iparam[2]); } for(i=0; ivalues[i]=unreal[i].re; if(fabs(unreal[i].im)>1.0e-10*fabs(unreal[i].re)){ fprintf(stderr,"Eigenvalue %i is returning %g%+g*i\n", i,unreal[i].re,unreal[i].im); goto error; } } free(select); free(qstore); free(unreal); if(ierr!=0){ fprintf(stderr,__FILE__": ARPACK ZNEUPD returning error = %li\n",ierr); goto error; } } #else fprintf(stderr,__FILE__": No appropriate diagonalization chosen\n"); goto error; #endif /************ choose diagonalization method *****************/ /********************************************************************/ } else { fprintf(stderr,__FILE__": Very bad error\n"); goto error; } if(print_info){ printf(" "); for(j=0, i=3; jvalues[j]); if((i++)%6==0 && j+1vectors.r[i+j*n]); printf("\n"); } #endif goto end; /* Yes, there are rare occasions where a goto is OK.... */ error: fprintf(stderr,__FILE__": Error in spectrum().\n"); #ifdef IBM_SP2 if(my_rank%8==0) #endif print_memory_usage(stderr,basis); free_eigensystem(eigen); end: free(work); free(cwork); return; } /* subroutine to print out current (local) memory use */ #define PRINT_PS 0 /* print out the results of "ps ux" */ void print_memory_usage(FILE *fp, full_basis *x){ #if PRINT_PS FILE *pipe; #define LINE_SIZE 256 char s[LINE_SIZE]; #endif fprintf(fp,"MM local memory usage: els.r/els.c " SIZE_T_PRINT ", els.i " SIZE_T_PRINT ",\n", els.rc_length,els.i_length); fprintf(fp,"MM els.j " SIZE_T_PRINT " bytes.\n",els.j_length); fprintf(fp,"MM reduced basis " SIZE_MEM_PRINT " bytes, ", reduced_basis_memory); fprintf(fp,"full basis " SIZE_T_PRINT " bytes\n", (sizeof(partype)*x->nptrunc+sizeof(*x->np)+sizeof(*x->inner))* x->length); print_interaction_memory(fp); print_source_interaction_memory(fp); #if PRINT_PS pipe=popen("ps ux","r"); while(fgets(s,LINE_SIZE,pipe)!=NULL)fprintf(fp,"MM %s",s); pclose(pipe); fprintf(fp,"\n"); #endif } /* free() all memory used in spectrum On the CRAY, the various quantities should be deallocated in reverse order of allocation. This is only of use for reducing memory fragmentation if freemahk() is also called. This is because resthk() calls mahk(). */ /* free memory used by an eigensystem calculation */ void free_eigensystem(eigensystem *eigen){ free(eigen->values); eigen->values=NULL; eigen->values_length=0; free(eigen->vectors.r); free(eigen->vectors.c); eigen->vectors.r=NULL; eigen->vectors.c=NULL; eigen->vectors.length=0; } /* Free memory used to construct the hamiltonian matrix. This is mainly for debugging purposes: none of this memory is "lost" if a new Hamiltonian is calculated. */ void free_spectrum(void){ free_els(&els); /* free structure holding the Hamiltonian matrix */ free_improved(); /* free memory used for matrix elements */ free_source(); /* Free memory used by the heavy source interactions */ free_reduced(); /* free memory used to store bases */ } element fdot(const element *x,const element *y,int n){ int i; element t=0.0; for(i=0; i