#include #include #include #include #include #include "hamilton.h" /* elslist is declared here */ #include "interact.h" #include "row.h" #ifdef USE_MPI #include "usempi.h" #endif /* The subroutines in this file calculate the Hamiltonian matrix. More precisely, they calculate the operator 2 P^+ P^- or the residual v^+ P^- in the case of the heavy source. */ /* print timing information and size of basis. */ #define PRINTIT 0 /* On machines like the CRAY and the IBM SP2, incrementally increasing the size of an array (with realloc) after subsequent arrays have been allocated causes either memory holes (in the CRAY) of performance loss as memory is moved around (IBM SP2). This is an issue for storing nonzero elements of the Hamiltonian. */ #ifndef NONZERO_CHUNK #define NONZERO_CHUNK 20*1024*1024 /* minimum increase in matrix elements */ #endif /* hamels(basis,couplings) returns the nonzero matrix elements of the Hamiltonian (upper triangle) for a given set of couplings and basis. The nonzero matrix elements are returned. This include code that checks only nonzero matrix elements. */ void hamels(elslist *els, full_basis *x, element *couplings){ int i,j,npi,npj,k,jrest,ndivroot; latype p; int flip; size_t nonzero=0,temp_length; element y1,y2,yt,kfactor,*divroot=NULL; partype c[NPMAX],gc[ORDER_GROUP][NPMAX], *ipart, *jpart, *restpart; const latlist *subg; cbase rest; int (*whichflip)(partype *out, partype *in, enum state_type type, int np); #if PRINTIT clock_t t1=clock(); #endif #ifdef USE_MPI int nproc,myrank; MPI_Comm_size(mpi_group,&nproc); MPI_Comm_rank(mpi_group,&myrank); #endif if(x->type != TYPE_GLUE){ fprintf(stderr,__FILE__": wrong type for hamels!\n"); goto error; } subg=latgroups+subgroup[x->multi]; els->n=x->length; /* set up pointer for real matrix */ if(els->r==NULL){ els->r= (element *) els->c; els->c = NULL; } temp_length=sizeof(*els->j)*(els->n+1); /* memory needed for els->j */ if(temp_length>els->j_length) if(NULL==(els->j=realloc(els->j,els->j_length=temp_length))){ fprintf(stderr,__FILE__ ":%i Can't allocate for els->j, els->n=" SIZE_T_PRINT "\n",__LINE__ ,els->n); goto memerror; } if(NULL==(rest.s=malloc((rest.length=els->n*x->nptrunc)*sizeof(partype)))){ fprintf(stderr,__FILE__":%i Problem allocating for rest, " "rest.length=" SIZE_T_PRINT "\n",__LINE__,rest.length); goto memerror; } /* sign for flipping reversed states */ if(x->charge){ whichflip=charge_conjugation; flip=x->charge; } else if(x->o){ whichflip=orientation; flip=x->o; } else { whichflip=NULL; flip=0; } /* Allocate and initialize the vector used to supply the norm */ kfactor=(element) x->kt; /* a look-up table for kfactor/sqrt(integers) */ ndivroot=(x->type==TYPE_GLUE?x->nptrunc:1); ndivroot*=2*subg->length; ndivroot=1+ndivroot*ndivroot; if(NULL==(divroot=malloc(ndivroot*sizeof(element)))){ fprintf(stderr,__FILE__":%i Problem allocating for divroot, " "ndivroot=%i\n",__LINE__,ndivroot); goto memerror; } for(i=1;is; ilength; i++, ipart+=x->nptrunc){ npi=x->np[i]; els->j[i]=nonzero; /* els->j is filled even if we skip row */ #ifdef USE_MPI /* If this is not in range, go to the next row */ if(i%nproc != myrank)continue; #endif /* Calculate all lattice transformations of the left state */ for(p=0; plength; p++) /* no phase because there are no quarks */ statelat(gc[p],ipart,x->type,npi,subg->e[p]); /* find states with nonzero product with i */ #if 1 /* find states with possible nonzero product with ipart (FAST) */ row_glue(&rest,x,ipart,npi); #else /* all states in basis after i; should give same answer (SLOW)*/ rest.length=x->length-i; memcpy(rest.s,ipart,rest.length*x->nptrunc*sizeof(partype)); #endif /* See if there is enough memory to store next row */ temp_length=sizeof(*els->r)*NONZERO_CHUNK* ((nonzero+rest.length-1)/(NONZERO_CHUNK)+1); if(temp_length>els->rc_length){ if(NULL==(els->r=realloc(els->r,els->rc_length=temp_length))){ fprintf(stderr,__FILE__":%i els->r memory\n",__LINE__); goto memerror; } } temp_length=sizeof(*els->i)*NONZERO_CHUNK* ((nonzero+rest.length-1)/(NONZERO_CHUNK)+1); if(temp_length>els->i_length){ if(NULL==(els->i=realloc(els->i,els->i_length=temp_length))){ fprintf(stderr,__FILE__":%i els->i memory\n",__LINE__); goto memerror; } } /* loop through states in rest */ for(j=i, jrest=0, jpart=ipart, restpart=rest.s; jrestnptrunc, jrest++, restpart+=x->nptrunc){ #if 1 /* binary divide-and-conquer search findnext (FAST)*/ k=findnext(jpart,restpart,x->length-j,x->nptrunc); if(k==x->length-j){ fprintf(stderr,"i=%i j=%i\n",i,j); goto error; } j+=k; jpart+=k*x->nptrunc; #else /* go through all states; it should give same result (SLOW) */ while(memcmp(restpart,jpart,x->np[j]*sizeof(partype))!=0){ j++; jpart+=x->nptrunc; } #endif npj=x->np[j]; yt=0.0; /* make either orientation or charge conjugation of right state */ /* no phase, because there are no quarks */ if(flip)whichflip(c,jpart,x->type,npj); for(p=0; plength; p++){ interact_glueball(&y1,gc[p],npi,jpart,npj,couplings); if(flip)interact_glueball(&y2,gc[p],npi,c,npj,couplings); yt += (flip?y1+flip*y2:y1)*multiplet[x->multi][p]; } /* if zero, go to next state */ if(fabs(yt)<=ORDER_GROUP*2*ELEMENT_EPSILON)continue; /*This is the 'c' convention with zero for the first matrix element */ els->r[nonzero] = yt; els->i[nonzero] = j; #if 0 /* Debugging: the following should give identical results */ els->r[nonzero] *= kfactor/sqrt((element) (x->inner[i]*x->inner[j])); #else els->r[nonzero] *= divroot[x->inner[i]*x->inner[j]]; #endif nonzero++; } } els->j[els->n]=nonzero; #if PRINTIT printf("Total of %f CPU seconds to calculate %i matrix elements\n", (float) (clock()-t1)/(float) CLOCKS_PER_SEC,nonzero); #endif goto end; memerror: fprintf(stderr,__FILE__": Ran out of memory in hamels(), nonzero=" SIZE_T_PRINT "\n",nonzero); error: free(els->j); els->j=NULL; /* This is an error condition */ end: free(divroot); free(rest.s); return; } /**************************************************************** hamelsc(elslist,basis,couplings) returns the nonzero matrix elements of the Hamiltonian (upper triangle) for the case of a complex Hermitian matrix for a given set of couplings and basis. The nonzero matrix elements are returned. */ void hamelsc(elslist *els, full_basis *x, element *couplings){ int i,j,npi,npj,k,jrest,ndivroot,winding=iswinding(x->ht); latype p; int flip; size_t nonzero=0; /* counter for number of nonzero matrix elements */ int phase1[ORDER_GROUP],phase2=0; size_t temp_length; complex_element y1,y2,yt; element kfactor,*divroot=NULL,temp; partype c[NPMAX],gc[ORDER_GROUP][NPMAX], *ipart, *jpart, *restpart; const latlist *subg; cbase rest; int (*whichflip)(partype *out, partype *in, enum state_type type, int np); void (*whichinteract)(complex_element *total, partype *left, int npleft, partype *right, int npright, element *couplings)=NULL; void (*row_rest)(cbase *y, full_basis *bb, partype *x, int npx); #if PRINTIT clock_t t1=clock(); #endif #ifdef USE_MPI int nproc,myrank; MPI_Comm_size(mpi_group,&nproc); MPI_Comm_rank(mpi_group,&myrank); #endif subg=latgroups+subgroup[x->multi]; /* check for nonzero winding number */ if((x->type==TYPE_SOURCE)){ whichinteract=interact_source; row_rest=row_source; }else if(x->type==TYPE_MESON){ whichinteract=interact_meson; row_rest=row_meson; }else if(x->type==TYPE_HEAVY_LIGHT){ whichinteract=interact_heavy_light; row_rest=row_heavy_light; }else if(x->type==TYPE_GLUE){ if((PHASE_CONVENTION==1 || PHASE_CONVENTION==0) && !winding) whichinteract=interact_glueballc; else if(PHASE_CONVENTION==2) whichinteract=interact_glueballcu; else if(PHASE_CONVENTION==3 || (PHASE_CONVENTION==0 && winding)) whichinteract=interact_glueballcv; row_rest=row_glue; } else { fprintf(stderr,__FILE__":%i Can't find right interaction in hamelsc\n", __LINE__); goto error; } els->n=x->length; /* set up pointer for complex matrix */ if(els->c==NULL){ els->c= (complex_element *) els->r; els->r = NULL; } temp_length=sizeof(*els->j)*(els->n+1); /* memory needed for els->j */ if(temp_length>els->j_length) if(NULL==(els->j=realloc(els->j,els->j_length=temp_length))){ fprintf(stderr,__FILE__":%i Problem allocating for els->j, els->n=" SIZE_T_PRINT "\n",__LINE__,els->n); goto memerror; } if(NULL==(rest.s=malloc((rest.length=els->n*x->nptrunc)*sizeof(partype)))){ fprintf(stderr,__FILE__":%i Problem allocating for rest, rest.length=" SIZE_T_PRINT "\n",__LINE__,rest.length); goto memerror; } /* sign for flipping reversed states */ if(x->charge){ whichflip=charge_conjugation; flip=x->charge; } else if(x->o){ whichflip=orientation; flip=x->o; } else { whichflip=NULL; flip=0; } /* Allocate and initialize the vector used to supply the norm and overall factor of 2 P^+ */ /* a look-up table for kfactor/sqrt(integers) */ ndivroot=(x->type==TYPE_GLUE?x->nptrunc:1); ndivroot*=2*subg->length; ndivroot=1+ndivroot*ndivroot; if(NULL==(divroot=malloc(ndivroot*sizeof(element)))){ fprintf(stderr,__FILE__":%i Problem allocating for divroot, " "ndivroot=%i\n",__LINE__,ndivroot); goto memerror; } switch(x->type){ case TYPE_SOURCE: case TYPE_HEAVY_LIGHT: kfactor=1.0; break; case TYPE_GLUE: case TYPE_MESON: kfactor=(element) x->kt; break; default: kfactor=0.0; fprintf(stderr,__FILE__":%i bad type\n",__LINE__); } for(i=1;is; ilength; i++, ipart+=x->nptrunc){ npi=x->np[i]; els->j[i]=nonzero; /* els->j is filled even if we skip row */ #ifdef USE_MPI /* If this is not in range, go to the next row */ if(i%nproc != myrank)continue; #endif /* Calculate all lattice transformations of the left state */ for(p=0; plength; p++) /* Complex conjugate because this is a "bra" */ phase1[p]=-statelat(gc[p],ipart,x->type,npi,subg->e[p]); #if 1 /* calculate states with possible nonzero product with ipart (FAST) */ row_rest(&rest,x,ipart,npi); #else /* all states in basis after i; should give same answer (SLOW) */ rest.length=x->length-i; memcpy(rest.s,ipart,rest.length*x->nptrunc*sizeof(partype)); #endif /* See if there is enough memory to store next row */ temp_length=sizeof(*els->c)*NONZERO_CHUNK* ((nonzero+rest.length-1)/(NONZERO_CHUNK)+1); if(temp_length>els->rc_length){ if(NULL==(els->c=realloc(els->c,els->rc_length=temp_length))){ fprintf(stderr,__FILE__":%i els->c memory\n",__LINE__); goto memerror; } } temp_length=sizeof(*els->i)*NONZERO_CHUNK* ((nonzero+rest.length-1)/(NONZERO_CHUNK)+1); if(temp_length>els->i_length){ if(NULL==(els->i=realloc(els->i,els->i_length=temp_length))){ fprintf(stderr,__FILE__":%i els->i memory\n",__LINE__); goto memerror; } } /* loop through states in rest */ for(j=i, jrest=0, jpart=ipart, restpart=rest.s; jrestnptrunc, jrest++, restpart+=x->nptrunc){ /* see if j state matches the state in "rest" */ #if 1 /* binary divide-and-conquer search findnext (FAST) */ k=findnext(jpart,restpart,x->length-j,x->nptrunc); if(k==x->length-j)goto error; j+=k; jpart+=k*x->nptrunc; #else /* go through all states; it should give same result (SLOW) */ while(memcmp(restpart,jpart,x->np[j]*sizeof(partype))!=0){ j++; jpart+=x->nptrunc; } #endif npj=x->np[j]; yt.re=0.0; yt.im=0.0; /* make either orientation or charge conjugation of right state */ if(flip)phase2=whichflip(c,jpart,x->type,npj); for(p=0; plength; p++){ whichinteract(&y1,gc[p],npi,jpart,npj,couplings); phasemultiply(&y1,phase1[p]); if(flip){ whichinteract(&y2,gc[p],npi,c,npj,couplings); phasemultiply(&y2,phase2+phase1[p]); } yt.re += (flip?y1.re+flip*y2.re:y1.re)*multiplet[x->multi][p]; yt.im += (flip?y1.im+flip*y2.im:y1.im)*multiplet[x->multi][p]; } /* if zero, go to next state */ if(fabs(yt.re)<=ORDER_GROUP*2*ELEMENT_EPSILON && fabs(yt.im)<=ORDER_GROUP*2*ELEMENT_EPSILON)continue; /* Complex diagonal disaster */ if(i==j && fabs(yt.im)>ORDER_GROUP*2*ELEMENT_EPSILON){ fprintf(stderr,__FILE__":%i Complex diagonal element! i=%i\n", __LINE__,i); fprintf(stderr," yt = %g%+g*I\n",yt.re,yt.im); #ifdef USE_MPI fprintf(stderr," nproc=%i myrank=%i\n",nproc,myrank); #endif goto error; } #if 0 /* Debugging: the following should give identical results */ temp = kfactor/sqrt((element) (x->inner[i]*x->inner[j])); #else temp = divroot[x->inner[i]*x->inner[j]]; #endif /*This is the 'c' convention with zero for the first matrix element */ els->c[nonzero].re = yt.re*temp; els->c[nonzero].im = yt.im*temp; els->i[nonzero] = j; nonzero++; } } els->j[els->n]=nonzero; #if PRINTIT printf("Total of %f CPU seconds to calculate %i matrix elements\n", (float) (clock()-t1)/(float) CLOCKS_PER_SEC,nonzero); #endif goto end; memerror: fprintf(stderr,__FILE__": Ran out of memory in hamelsc(), " "nonzero=" SIZE_T_PRINT "\n",nonzero); error: free(els->j); els->j=NULL; /* This is an error condition */ end: free(divroot); free(rest.s); return; } /* diagnostic function to print some nonzero matrix elements... j=0 prints all elements */ void print_els(elslist *els, int j){ int i; size_t length=els->j[els->n]; if(j==0 || 2*j>=length) printf("Printing all nonzero matrix elements:\n list={"); else printf("Printing some nonzero matrix elements:\n {"); for(i=0; in-1,els->j[els->n-1]); else if(j!=0 && i>=j && i<=length-j)continue; if(els->r != NULL) printf("{%i,%.12g,0,",i,els->r[i]); else printf("{%i,%.12g%+.12g*I,",i,els->c[i].re,els->c[i].im); printf(SIZE_T_PRINT "}",els->i[i]); if(in; i++){ if(j!=0 && i==j)printf("..."); if(j!=0 && i>=j)continue; printf(SIZE_T_PRINT,els->j[i]); if(in){ printf(","); if(i%11==10)printf("\n "); } } printf("};\n"); } /* free memory used by els */ void free_els(elslist *els){ free(els->r); free(els->c); free(els->i); free(els->j); els->r=NULL; els->c=NULL; els->i=NULL; els->j=NULL; els->rc_length=0; els->i_length=0; els->j_length=0; }