#include #include #include #include #include #include "include.h" #include "run.h" #if defined(USE_MPI) #include "usempi.h" #endif #if defined(USE_MPI) && !(ONE_PROCESSOR_SPECTRUM) #define PDEBUGGER 0 /* debugging macro for OP */ #define COU 0 /* wait for COU calls before printing */ #define PRINTIT 0 /*Print values, method, time, et cetera in spectrum(...)*/ #define MYPRINT 0 /*Print various things associated with mpi */ #define DOP2 1 /*Using plandr2 if 0 does plandr*/ #define FINDVEC 1 /*Finds eigenvectors if 1, only used with plandr*/ /* MAX_ELS is a guess for the maximum number of nonzero matrix elements of the the Hamiltonian. If we define this carefully, memory fragmentation on the heap can be minimized. */ #ifndef MAX_ELS #define MAX_ELS 100000 #endif #ifndef MAX_WORK #define MAX_WORK 1000000 #endif /*************************************************************************/ /* list of nonzero matrix elements used by elop and spectrum. */ static elslist els={NULL,NULL,NULL,0,0,0}; static doublereal *spectrumval=NULL, *spectrumvec=NULL; static size_t lval=0, lvec=0; /* arrays used by OP defined in PLANDR2 */ static doublereal *given_vector=NULL, *tempsum=NULL; static int *recvsendarray=NULL, *displs=NULL; static doublereal *work=NULL; static size_t maxwork=0; void spectrum2(sectors *s, /* basis, filled by initbasis(...) */ sectors *s2, /* second basis for nonzero momentum */ doublereal **vals, /* list of eigenvalues */ doublereal **vecin, /* list of eigenvectors */ integer nval, /* number of eigenvalues */ element *couplings, /* coupling constants */ element *angle, /* angles and momenta, length HINDEX+2 */ int globalkt, /* maximum value of kt, for mapcouplings */ integer nfig /* number of significant digits in answer */ ) { int i,j; element beta; integer n,ierr=0, nperm=0,bet; doublereal *work=NULL; doublereal *Bnd = NULL; /* maxj determines how much memory dnlaso uses. The minimum value is 6*nblock and is faster if larger. The limiting factor should be the amount of memory available. */ integer maxj, maxmaxj = 300; doublereal temp; clock_t t1; /*PLANDR2 variables*/ /*G&L variables*/ integer localn; #if DOP2 integer lohi = -1; #else doublereal endl=-1*pow(10.0,nfig), endr=0.0; integer ev; #endif doublereal Kappa = pow(10.0,-nfig); /* Gives desired precision */ integer J = 0; /*outputs j is number of lanzcos steps taken, neig is number of eigenvalues converged*/ /*Lanczos block is always 1*/ integer Msglvl = 0; /* message level for Plandr2 (0 to 20)*/ doublereal CondM = 1.0; integer mpicomm=MPI_COMM_WORLD; /*MPI variables*/ int my_id, comm_size; MPI_Comm_rank(MPI_COMM_WORLD, &my_id); MPI_Comm_size(MPI_COMM_WORLD, &comm_size); t1=clock(); #if MYPRINT if(my_id==MASTER) printf("processor %d Entering spectrum\n\n",my_id); #endif #if PRINTIT if (my_id == MASTER) { printf(" Eigenvalues, %li digits, globalkt=%i\n", nfig,globalkt); printf(" K=%i/2, %i-particles, o=%i, multi=%i, multi2=%i (%g %g);\n", s->kt,s->npmax,s->o,s->multi,s2->multi,angle[0],angle[1]); } #endif /* make basis, etc It is more efficient if mapcouplings is called with the globally largest value of kt so that the improved matrix elements are not recalculated. */ beta=mapcouplings(globalkt,couplings); #if MYPRINT printf("\nAfter mapcouplings stuff\n"); #endif makebasis(s); #if MYPRINT printf("\nAfter makebasis\n"); #endif if(s->o != s2->o || s->multi != s2->multi){ if(s->cyclic && s->kt>0){ rebasis(s,angle); makebasis(s2); rebasis(s2,angle); makephaselist(angle,s->kt); } else { makebasis(s2); maketenslist(angle[HINDEX+1],s->kt,angle[HINDEX],beta); } } #if MYPRINT printf("\nAfter more makebasis stuff\n"); #endif /* set aside initial guess for memory for nonzero matrix elements */ if(els.tchunko==s->o && s2->multi==s->multi) { #if PRINTIT if(my_id == MASTER) { printf(" using hamels"); fflush(stdout); } #endif hamels(&els,s,couplings); } else { #if PRINTIT if(my_id == MASTER) { printf(" using hamelsc"); fflush(stdout); } #endif hamelsc(&els,s,s2,couplings); } n=els.n; /* Make sure that maxj is never larger than els.n (error 32 in plandr2) */ if (n>maxmaxj) maxj = maxmaxj; else maxj = n; if(els.j==NULL) { fprintf(stderr,"spectrum() detecting an error in hamels or hamelsc\n"); goto error; } /* remember largest number of nonzero matrix elements. */ if(max_elsj) j=els.j[i+1]-els.j[i]; printf("longest row is %i\n",j); #endif if(n>lval){ if(NULL==(spectrumval=realloc(spectrumval,(lval=n)*sizeof(doublereal)))) { fprintf(stderr,"spectrumval not allocated\n"); goto error; } if(NULL==(given_vector = realloc(given_vector, lval * sizeof(doublereal)))) { fprintf(stderr, "given_vector not allocated\n"); goto error; } if(NULL==(tempsum = realloc(tempsum, lval * sizeof(doublereal)))) { fprintf(stderr, "tempsum not allocated\n"); goto error; } } /* Set up memory allocation for plandr2 and OP The pointers work,val and spectrumvec are defined globally. */ #if 0 /* spectrumvec is not defined */ if(lvec (maxj + 1)) bet = 5 * localn + 4 * maxj + localn + 1 + (maxj * maxj); else bet = 5 * localn + 4 * maxj + maxj + 2 + (maxj * maxj); #else if(localn > (maxj + 1)) bet = 5 * localn + 4 * maxj + localn + 1; else bet = 5 * localn + 4 * maxj + maxj + 2; #endif #if 0 work = malloc(bet * sizeof(doublereal)); /*end of work stuff*/ #endif if(bet>maxwork) { maxwork=(bet>MAX_WORK)?bet:MAX_WORK; work=realloc(work,maxwork*sizeof(doublereal)); } /* if maxj is changed then the following statement could get us in all kinds of trouble */ if (NULL==(Bnd = malloc(maxj * sizeof(doublereal)))) { fprintf(stderr, "Bnd not allocated\n"); goto error; } for (i=0; i < bet; i++)work[i] = 0.0; t1=clock(); #if MYPRINT printf("\nCalling Plandr2"); #endif #if DOP2 PLANDR2(&localn, &maxj, &nval, &lohi, &CondM, &Kappa, &J, &nperm, spectrumval, Bnd, work, &bet, &ierr, &Msglvl, &mpicomm); #else PLANDR(&localn, &maxj, &nval, &CondM, &endl, &endr, &ev, &Kappa, &J, &nperm, spectrumval, Bnd, work, &ber, &ierr, &Msglvl, &mpicomm); #endif #if MYPRINT printf("\nReturning from Plandr2\n"); #endif #if PRINTIT if(my_id == MASTER) printf(" in %f seconds, %li multiplies, maxj=%li\n", (float)(clock()-t1)/(float) CLOCKS_PER_SEC,J,maxj); #endif if(ierr!=0) { fprintf(stderr, "PLANDR2 returning error = %li\n", ierr); goto error; } } #if PRINTIT /* Print eigenvalues */ if (my_id == MASTER) { printf(" "); for(j=0, i=3; j= COU) { printf("\n Plandr has called OP"); printf("\n Doing MPI rank and size stuff"); } #endif MPI_Comm_rank(*our_comm_world, &my_id); MPI_Comm_size(*our_comm_world, &comm_size); #if PDEBUGGER if(plandrcounter >= COU) printf("\nmyid=%d call number=%d\n", my_id, plandrcounter); printf("els.n=%i %p %p\n",els.n,given_vector[0],given_vector, tempsum); #endif for(counter=0; counter< els.n; counter++) { given_vector[counter] = 0.0; tempsum[counter] = 0.0; } /* CONCATENATES EACH PROCESSOR'S SMALL GIVEN INTO LARGE GIVEN FOR EVERYBODY'S USE */ MPI_Allgather(&int_n, 1, MPI_INT, recvsendarray, 1, MPI_INT, *our_comm_world); #if PDEBUGGER if(plandrcounter >= COU) printf("\n%d %d Leaving first MPI routine Allgather", my_id, plandrcounter); #endif displs[0] = 0; for(i=1;i< comm_size; i++) displs[i] = displs[i-1] + recvsendarray[i-1]; #if PDEBUGGER && 0 if(plandrcounter >= COU) printf("\n%d %d Calling MPI routine Allgatherv", my_id, plandrcounter); #endif #if PDEBUGGER MPI_Errhandler_set(MPI_COMM_WORLD, MPI_ERRORS_RETURN); #endif counter = MPI_Allgatherv(Q, *n, MPI_DOUBLE, given_vector, recvsendarray, displs, MPI_DOUBLE, *our_comm_world); /* END CONCATENATION PROCESS */ #if PDEBUGGER if(plandrcounter >= COU) printf("\n%d %d Returning from MPI routine Allgatherv", my_id, plandrcounter); #endif for (initcount = 0; initcount < *n; initcount++) R[initcount] = 0; #if PDEBUGGER if(plandrcounter >= COU) printf("\n%d %d initcount initialized", my_id, plandrcounter); #endif for(counti = 0; counti < els.n; ++counti) { countk = els.j[counti]; PointI = els.j[counti + 1]; /* see if this row is not empty and if it has a diagonal */ /* Both conditions are needed for the last element of the matrix */ if (countk= COU) printf("\n%d %d Matrix vector multiply finished successfully\n\n", my_id, plandrcounter); #endif MPI_Reduce_scatter(tempsum, R, recvsendarray, MPI_DOUBLE, MPI_SUM, *our_comm_world); MPI_Barrier(*our_comm_world); #if PDEBUGGER if(plandrcounter >= COU) printf("\n%d %d Returning to plandr2\n", my_id, plandrcounter); #endif return SUBRETURN; } /************************************************************************/ extern fsub OPM(integer *n, doublereal *x, doublereal *y, integer *our_comm_world) { integer count; for(count = 0; count < *n; count++) y[count] = x[count]; return SUBRETURN; } void FindChunk(integer *Size, int PeID, int Comm_Size, MPI_Comm Our_Comm) { int tempsize; *Size = (els.n * (PeID + 1))/Comm_Size - (els.n * PeID)/Comm_Size; MPI_Reduce(Size, &tempsize, 1, MPI_INT, MPI_SUM, 0, Our_Comm); if(PeID == 0 && els.n != tempsize) fprintf(stderr,"The tempsizes don't add up! tempsize=%i n=%i\n", tempsize,els.n); } #endif