#include <stdio.h>
#include <string.h>
#include <time.h>
#include <stdlib.h>
#include <math.h>
#include "include.h"
#include "run.h"
#if defined(USE_MPI)
#include "usempi.h"
#endif

/** This file runs the lanczos algorithm for finding the lowest
    Eigenvalues of the Hamiltonian with extrapolation for nonzero P_1.
    The results are then analyzed for consistant dispersion relations,
    and a best fit is found based on this criterion.

    This version also demands degeneracy in the lowest parity doublet.
    It also uses the string tension to determine the lattice spacing.
**/

#if HINDEX==1
#define PRINTIT 1 /*Print timing and convergence information  */
                  /*for each iteration of fcn */

#define FTEPER 0 /* Teper's states to fit, each sector*/
                 /* This can be 0 to NSTILLVALS (2 is a good nonzero choice) */
#define FCSQUARED 1*TOTALSTATES /* Number of c^2 values to use in fit. */
                 /* This can be 0, 1 or TOTALSTATES */
#define FPARITY 2 /* Number of parity doublets to fit. */
                  /* This can be 0, 2. */
#define FSCALE 1 /*Whether to include consistantcy of the */
                 /*longitudinal potential with the overall scale.
		   The potential itself is calculated anyway.
		   This can be 0 or 1. */
#define FLONG MAX_LONG  /*Number of longitudinal heavy potential */
                       /*points to fit */
                       /*This can be 0 to MAX_LONG. */
#define NJPC 2 /*Number of JPC sectors for nonzero P_perp */
#define NP1 1  /*Number of values of nonzero transverse momentum */
#define TOTALSTATES 8 /*Total number of states in all NJPC sectors */
                      /*This is not so easily changed... */
#define NSTILL 4 /*Number of sectors for zero transverse momentum spectra
                    This must be 4 to cover all multiplets. */ 
#define NSTILLVALS 4  /*Number of states calculated in each sector*/
                      /*This must be large enough to obtain all
                        nonzero momenta states. */
#define MAXTH 10 /* max number of terms in fit functions */
#define NLONG 3 /*Data points used to estimate longitudinal potential */
#define NWIND 3 /* Data points used to estimate winding potential */
#define MAX_LONG 3 /* max number of criteria for roundness of long. pot. */
              

#if 1  /* newer smaller test basis */
       /* one step takes about 47 CPU seconds on the Pentium Pro */
       /* one step takes about 80 CPU seconds on the Hewlett Packard */
#define NS 4
  const static int kt[NS]={10,10,14,18}, np[NS]={6,8,6,6};
#define WNS 4
  int wkt[NWIND][WNS]={{10,10,14,18},{9,9,11,13},{10,10,12,14}}, 
    wnp[NWIND][WNS]={{4,6,4,4},{5,7,5,5},{6,8,6,6}},
    wht[NWIND][HINDEX]={{2},{3},{4}};
#define LNS 5
  static int lkt0[LNS]={-14,-14,-14,-34,-34},
      lkt[MAX_LONG][LNS]={{-7,-7,-7,-21,-21},{-7,-7,-7,-21,-21},
			 {-7,-7,-7,-21,-21}}, 
	lnp0[LNS]={2,4,2,2,2},
	lnp[MAX_LONG][LNS]={{3,5,3,3,3},{3,5,3,3,3},{3,5,3,3,3}};
  element lkmax[LNS]={3.0,3.0,4.0,3.0,4.0},llong0[NLONG]={3.0,4.0,6.0},
	llong[MAX_LONG]={0.0,2.5,5.0};
#define PNS 6 /* number of sectors in p extrapolation */
  static int pnp1[PNS]={1,1,3,3,5,5}, pkt1[PNS]={19,29,19,29,19,21},
    pht1[HINDEX]={1};

#elif 0  /* August 2000 Larger basis */
         /* one step takes about 12.5 CPU minutes (geneva cluster) */
#define NS 4
  const static int kt[NS]={20,20,26,32}, np[NS]={6,8,6,6};
#define WNS 4
  static int wkt[NWIND][WNS]={{20,20,24,34},{21,21,25,35},
			      {20,20,24,34}}, 
    wnp[NWIND][WNS]={{4,6,4,4},{5,7,5,5},{6,8,6,6}},
    wht[NWIND][HINDEX]={{2},{3},{4}};
#define LNS 7
  static int lkt0[LNS]={-40,-40,-40,-50,-50,-70,-70}, 
      lnp0[LNS]={2,4,2,2,2,2,2},
      lkt[MAX_LONG][LNS]={{-21,-21,-21,-35,-35,-69,-69},
			 {-21,-21,-21,-35,-35,-69,-69},
			 {-21,-21,-21,-35,-35,-69,-69}}, 
      lnp[MAX_LONG][LNS]={{3,5,3,3,3,3,3},{3,5,3,3,3,3,3},{3,5,3,3,3,3,3}};
  element lkmax0[LNS]={4.0,4.0,3.5,3.75,4.25,4.0,4.5},
    lkmax[MAX_LONG][LNS]={{4.0,4.0,3.5,4.0,5.0,5.0,6.0},
			  {4.0,4.0,3.5,4.0,5.0,5.0,6.0},
			  {4.0,4.0,3.5,4.0,5.0,5.0,6.0}},
    llong0[NLONG]={3.0,4.0,6.0},llong[MAX_LONG]={0.0,2.5,5.0};
#define PNS 7 /* number of sectors in p extrapolation */
  static int pnp1[PNS]={3,3,3,5,5,7,7},pkt1[PNS]={21,27,39,21,27,21,23},
    pht1[HINDEX]={1};

#elif 0  /* 1999 Larger basis */
/* one step takes about 21 CPU minutes */
/* one step takes about 34 CPU minutes on the Hewlett Packard */
/* Note there is an error in the winding extrapolation */
#define NS 6
const static int kt[NS]={18,18,20,20,26,32}, np[NS]={6,8,6,8,6,6};
#define WNS 6
static int wkt[NWIND][WNS]={{20,20,20,20,24,28},{19,19,21,21,23,27},
			    {18,18,20,20,22,26}},
  wnp[NWIND][WNS]={{4,6,4,6,4,4},{5,7,5,7,5,5},{6,8,6,8,6,6}},
    wht[NWIND][HINDEX]={{2},{3},{4}};
#define LNS 7
static int lkt0[LNS]={-32,-32,-32,-44,-44,-60,-60},
  lkt[MAX_LONG][LNS]={{-19,-19,-19,-33,-33,-49,-49},
		      {-19,-19,-19,-33,-33,-49,-49},
		      {-19,-19,-19,-33,-33,-49,-49}},
    lnp0[LNS]={2,4,2,2,2,2,2},
      lnp[MAX_LONG][LNS]={{3,5,3,3,3,3,3},{3,5,3,3,3,3,3},{3,5,3,3,3,3,3}};
element lkmax0[LNS]={3.0,3.0,4.5,3.0,4.5,3.0,4.5},
  llong0[NLONG]={3.0,4.0,6.0},llong[MAX_LONG]={0.0,2.5,5.0};
#define PNS 7 /* number of sectors in p extrapolation */
static int pnp1[PNS]={3,3,3,5,5,7,7},pkt1[PNS]={21,27,39,21,27,21,23},
  pht1[HINDEX]={1};


#elif 0 /* Truncations used in paper */
#define NS 7
  const static int kt[NS]={20,20,20,22,24,26,28}, np[NS]={4,6,8,6,6,6,6};
#define NWIND 4
#define WNS 1
  static int wkt[NWIND][WNS]={{22},{21},{22},{21}}, 
    wnp[NWIND][WNS]={{6},{7},{8},{9}},
    wht[NWIND][HINDEX]={{2},{3},{4},{5}};
#define LNS 1
  static int lkt[LNS]={-30}, 
    lnp[LNS]={4};
#define PNS 5 /* number of sectors in p extrapolation */
  static int pnp1[PNS]={3,3,5,5,7}, pkt1[PNS]={21,19,19,21,19},
      pht1[HINDEX]={1};
  static element longlist[NLONG]={3.0,4.0,5.0},kmax[2]={0.0,4.0};

#elif 0 /* older test basis */
#define NS 4
  const static int kt[NS]={14,12,14,16}, np[NS]={4,6,6,4};
#define NWIND 3
#define WNS 3
  int wkt[NWIND][WNS]={{12,14,16},{9,11,13},{10,12,14}}, 
    wnp[NWIND][WNS]={{6,4,4},{7,5,5},{8,6,6}},
    wht[NWIND][HINDEX]={{2},{3},{4}};
#define LNS 4
  static int lkt0[LNS]={-14,-14,-18,-30}, lkt1[LNS]={-7,-7,-17,-21}, 
      lnp0[LNS]={2,4,2,2}, lnp1[LNS]={3,5,3,3};
  element kmaxlist[LNS]={3,3,3,3},longlist[NLONG]={3.0,4.0,5.0};
#define PNS 4 /* number of sectors in p extrapolation */
  static int pnp1[PNS]={1,3,5,7}, pkt1[PNS]={11,11,11,11},pht1[HINDEX]={1};
#endif

/*  Below are several choices for errors.
    The error for:  c^2 for each state, 
    roundness of heavy quark potential (difference in eigenvalues),
    scale determined by longitudinal error (fractional error), and
    parity doublets  (fractional error).           */
#if 0 && MAX_LONG<4 /* try to get as many states correct as possible */
static const element lerr0=0.2, lerr[MAX_LONG]={0.2,0.2,0.2},
    c2err[TOTALSTATES]={0.5,2.0,0.5,0.5,0.5,0.5,0.5,0.5}, 
  parityerr[2]={2.0,0.5};
#elif 1 && MAX_LONG<4 /* new compromise between the above and below. */
                      /* Used August 2000 */
static const element lerr0=0.1, lerr[MAX_LONG]={0.1,0.1,0.1},
    c2err[TOTALSTATES]={0.1,2.0,2.0,0.5,0.25,0.25,1.0,1.0}, 
  parityerr[2]={1.0,0.1};
#elif 0 && MAX_LONG<4 /* compromise between the above and below. */
static const element lerr0=0.2, lerr[MAX_LONG]={0.1,0.1,0.1},
    c2err[TOTALSTATES]={0.25,2.0,0.5,2.0,0.25,0.5,0.5,2.0}, 
  parityerr[2]={2.0,0.5};
#elif 0 /* try to get lowest state correct in each charge sector. */
static const element lerr0=2.5, lerr[2]={3.0,0.3},
    c2err[TOTALSTATES]={0.15,2.0,2.0,2.0,0.15,2.0,2.0,2.0}, 
  parityerr[2]={2.0,2.0};
#endif

         /* Teper's lattice data with errors for the first
            four states in each charge conjugation sector: 
            Also included is a 5% error from our eigenvalues.
            See "jessica.nb" and hep-lat/9804008               */
#if FTEPER<=4 
element teper[NSTILL][4]=
            {{16.5242,38.1924,47.3344,63.8401},
	     {63.0436,88.1721,89.6809,92.9296},
	     {34.9281,58.2169,63.0436,80.2816},
	     {47.3344,74.3044,81.3604,100.}},
    tepererr[NSTILL][4]=
            {{0.939451,2.49569,2.8858,4.7485},
	     {5.58069,9.86634,22.4233,11.2252},
	     {3.43249,6.35237,5.58069,12.3203},
	     {2.8858,7.53134,6.77042,6.52993}};
#endif

/* Winding numbers for longitudinal string tension. lht0 must be
   zero since the lattice spacing is unknown  */
#if MAX_LONG<4
static int lht0[HINDEX]={0},lht[MAX_LONG][HINDEX]={{1},{1},{1}};
#endif
const int nvals[NJPC]={4,4};
static sectors s[NJPC][NS],ws[NWIND][WNS],ls0[LNS],
   lt0[LNS],ls[FLONG][LNS],lt[FLONG][LNS],t[NJPC][NS],ps1[PNS],
		       stillbases[NSTILL][NS];
static int wpar[NPARAMS],scalemethod;
static doublereal p1a[NP1][HINDEX]={{0.25}};
static element par[NPARAMS];
static integer wth,lth;
/* These are all the quantities returned by fcn */
static struct {doublereal pextrap; element lvalue[FLONG]; 
  element calclvalue[FLONG];
  element wfitting[MAXTH]; element lfitting[MAXTH]; element rescale;
  element spectrumvals[NP1+1][TOTALSTATES]; element c2[TOTALSTATES];
  element stillvals[NSTILL][NSTILLVALS];} best;

   
int MAIN(int argc, char **argv){
#define NOUT 100
           /*Dimensions of chi^2 fit*/
#define MM (NSTILL*FTEPER+FCSQUARED+FPARITY+FSCALE+FLONG)
  char out[NOUT],*pout;  /* Output file name length */
  integer mm=MM,npar=1,maxfev,mode=1,ifail=0,info,nfev,ipvt[NPARAMS];
/*
      epsfcn should be a little less than the Lanczos accuracy. (say 10 times)
      ftol should be a little more than epsfcn
      xtol is the desired precision of the coupling constants  
      factor controls the maximum initial step size.  If the
        chi^2 surface is not smooth, this must
        be decreased to maybe 1. 
        (recommended range:  0.1 to 100, default 100).

      epsfcn is the variable used to determine the step size 
      for the finite difference approximation.  Since the 
      extrapolations magnify any error in the eigenvalues,
      this should be comfortably larger than the errors in
      the lanczos routine.  However, it should also be smaller
      than xtol.                                             
*/
  doublereal xtol=1.0e-3,gtol=0.0,ftol=5.e-5,epsfcn=5.0e-5,factor=1.0;
  doublereal vpar[NPARAMS],fvec[MM],diag[NPARAMS],fjac[MM*NPARAMS];
  doublereal qtf[NPARAMS],wa1[NPARAMS],wa2[NPARAMS],wa3[NPARAMS],wa4[MM];
  clock_t t1;
  int i,j,k,l,o[NJPC]={1,1},o2[NJPC]={-1,-1},ht[HINDEX]={0},
		       multi[NJPC]={1,2},multi2[NJPC]={2,1};
#if NSTILL==4
  int multistill[NSTILL]={1,1,2,2},ostill[NSTILL]={1,-1,1,-1};
#endif
  element chi2;
  FILE *fp;

#ifdef USE_MPI
  int myrank;
#endif

#ifdef BABBAGE  /* Routine to initialize Fortran on Babbage */
  if(hf_fint((char*)NULL)) exit(1);
#endif

  /* initialize parallelization via MPI when applicable.
     in this case we use a simple master/slave setup to
     distribute the construction of the Hamiltonian and the
     diagonalization of the various bases.      */

#ifdef USE_MPI
  MPI_Init(&argc, &argv);     /* initialize MPI */
  initmpisectors();
  MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
  if(myrank!=0){
    slave();
    MPI_Finalize();   /* cleanup MPI */
    return 0;
  }
#endif

  /* Choose scale method, Load in data files, and open output file */

  printf("Output file name:  ");
  fgets(out,NOUT,stdin);
  if((pout=strchr(out,'\n'))!=NULL)*pout='\0';
  printf("%s\n",out);
  if((fp = fopen(out,"w")) == NULL){
    fprintf(stderr,__FILE__":  Can't open file %s, exiting\n",out);
    goto cleanup;
  }

  printf("Input maximum number of iterations:  ");
  scanf("%li",&maxfev);

  printf("Choose method for determining the overall scale:\n"
         "    0 use longitudinal string tension\n"
         "    1 fit lowest state in spectrum to lattice value\n"
         "    2 demand minimum chi^2 for c^2 measurements\n"
	 "    3 find scale by demanding overall minimum chi^2.\n");
  scanf("%i",&scalemethod);  

  for(i=0, j=0; i<NJPC; i++)j+=nvals[i];
  if(j!=TOTALSTATES){
     fprintf(stderr,__FILE__":  Number of states don't match in Jessica\n");
    goto cleanup;
  }

  /*  Print out the results in the output file **/
  fprintf(fp,"********** Eigenvalues for the 2+1 transverse lattice **********\n");
#if NEWTAU
  fprintf(fp,"Couplings:  m^2, G^2 N, la_1, la_2, la_3, tau_1, tau_2          \n");
  fprintf(fp,"             0      1     2     3     4      5      6   (2-6 /a)\n");
#else
  fprintf(fp,"Couplings:  m^2, G^2 N, la_1, la_2, la_3, tau (2-5 divided by a)\n");
  fprintf(fp,"             0      1     2     3     4    5                    \n");
#endif
  fprintf(fp,"Use chi^2 fit with %i criteria, and tolerance %g.  \n",MM,xtol);
  if(scalemethod==0)
    fprintf(fp,"Overall scale from longitudinal string tension.\n");
  else if(scalemethod==1)
    fprintf(fp,"Overall scale from fitting lowest state to lattice value.\n");
  else if(scalemethod==2)
    fprintf(fp,"Overall scale determined from best fit for c^2.\n");
  else if(scalemethod==3)
    fprintf(fp,"Overall scale from minimizing chi^2.\n");
  else {
     fprintf(stderr,__FILE__":  Incorrect scale method = %i, exiting.\n",scalemethod);
    goto cleanup;
  }
#ifdef KSQUARED
  fprintf(fp,"1/K^2 term used in all extrapolations.\n");
#endif
#if FTEPER>0
  fprintf(fp,"Includes a fit to %i*%i eigenvalues from Teper.\n",NSTILLVALS,FTEPER);
#endif
#if FPARITY>0
  fprintf(fp,"%i parity doublets with fractional errors",FPARITY);
  for(i=0, k=4; i<FPARITY; k++, i++){
    if(k%6==0)fprintf(fp,"\n ");
    fprintf(fp," %g",parityerr[i]);
  }
  fprintf(fp,".\n");
#else
  fprintf(fp,"No parity doublets.\n");
#endif
  fprintf(fp,"Spectrum for P_perp a = (0) ");
  for(i=0; i<NP1; i++){
    fprintf(fp," (");
    for(j=0; j<HINDEX; j++)fprintf(fp," %g",p1a[i][j]);
    fprintf(fp,") ");
  }
  fprintf(fp,"\n using (# states, o, multiplet, c^2 error for each) =");
  for(i=0, k=0; i<NJPC; i++){
    fprintf(fp,"\n  (%i, %i & %i, %i & %i,",
	    nvals[i],o[i],o2[i],multi[i],multi2[i]);
    for(j=0; j<nvals[i]; k++, j++)
      if(k<FCSQUARED)fprintf(fp," %g",c2err[k]);
    fprintf(fp,")");
  }
  fprintf(fp,".\nSpectra extrapolated using (K,p) =");
  for(i=0, k=3; i<NS; k++, i++){
    if(k%5==0)fprintf(fp,"\n ");
    fprintf(fp," (%i/2,%i) ",kt[i],np[i]);
  }
  fprintf(fp,".\nWinding potential using (n,K,p) =");
  for(i=0, k=3; i<NWIND; i++)
    for(j=0; j<WNS; k++, j++){
      if(k%5==0)fprintf(fp,"\n ");
      fprintf(fp," (");
      for(l=0; l<HINDEX; l++)fprintf(fp," %i",wht[i][l]);
      fprintf(fp,",%i/2,%i) ",wkt[i][j],wnp[i][j]);
    }
  fprintf(fp,".\nHeavy potential determined using (n,K,p,K_max) =");
  for(j=0, k=3; j<LNS; k++, j++){
    if(k%4==0)fprintf(fp,"\n ");
    fprintf(fp," (");
    for(l=0; l<HINDEX; l++)fprintf(fp," %i",lht[0][l]);
    fprintf(fp,",%i/2,%i,%g) ",lkt0[j],lnp0[j],lkmax0[j]);
  }
  fprintf(fp,",\n L =");
  for(i=0; i<NLONG; i++)fprintf(fp," %g",llong0[i]);
  fprintf(fp," (all in G^2 N units)");
  if(FSCALE)fprintf(fp,"; relative scale error %g",lerr0);
  fprintf(fp,".\nRoundness determined using (n,K,p,K_max) =");
  for(i=0, k=3; i<FLONG; k=0, i++){
    for(j=0; j<LNS; k++, j++){
      if(k%4==0)fprintf(fp,"\n ");
      fprintf(fp," (");
      for(l=0; l<HINDEX; l++)fprintf(fp," %i",lht[i][l]);
      fprintf(fp,",%i/2,%i,%g) ",lkt[i][j],lnp[i][j],lkmax[i][j]);
    }
    fprintf(fp,"\n L=%g and error %g; ",llong[i],lerr[i]);
  }
  fprintf(fp,"all in G^2 N units.\n"
	     "p-extrapolation using n=(");
  for(l=0; l<HINDEX; l++)fprintf(fp," %i",pht1[l]);
  fprintf(fp,") and (K,p) =");
  for(j=0, k=5; j<PNS; k++, j++){
    fprintf(fp," (%i/2,%i) ",pkt1[j],pnp1[j]);
    if(k%6==0 && j+1<PNS)fprintf(fp,"\n ");
  }
  fprintf(fp,".\nResult format:\n"
      " Fit info, # steps, chi^2, p damping, and scale G^2 N/sigma.\n"
      " The %i couplings (G^2 N units) and which--if any--were fit.\n"
      " Winding potential and heavy source potential fits.\n"
      " Roundness with calculated and derived values (G^2 N units).\n"
      " The rescaled spectrum for each P_perp*a and c^2 values.\n"
#if NSTILL>0
      " Finally come the states for the ordinary spectra.\n"
#endif
      "\n",NPARAMS);
  fflush(fp);

  /* make basis structures */

  printf("Make %i spectrum bases,",2*NJPC*NS);
  t1=clock();
  for(j=0; j<NJPC; j++)
    for(i=0; i<NS; i++){
      initbasis(s[j]+i,ht,np[i],kt[i],1,o[j],multi[j]);
      initbasis(t[j]+i,ht,np[i],kt[i],1,o2[j],multi2[j]);
    }
  printf(" %i ordinary spectrum bases,",NSTILL);
  for(j=0; j<NSTILL; j++)
    for(i=0; i<NS; i++)
      initbasis(stillbases[j]+i,ht,np[i],kt[i],1,ostill[j],multistill[j]);
  printf(" %i winding bases,",NWIND*WNS);
  for(i=0; i<NWIND; i++)
    for(j=0; j<WNS; j++)
      initbasis(ws[i]+j,wht[i],wnp[i][j],wkt[i][j],1,1,0);
  printf(" %i long bases,",2*LNS);
  for(j=0; j<LNS; j++){
      initbasis(ls0+j,lht0,lnp0[j],lkt0[j],0,1,1);
      initbasis(lt0+j,lht0,lnp0[j],lkt0[j],0,-1,1);
  }
  for(i=0; i<FLONG; i++)
    for(j=0; j<LNS; j++){
      initbasis(ls[i]+j,lht[i],lnp[i][j],lkt[i][j],0,1,0);
      initbasis(lt[i]+j,lht[i],lnp[i][j],lkt[i][j],0,-1,0);
    }
  printf("\nand %i n=%i bases",PNS,*pht1);
  for(j=0; j<PNS; j++)
      initbasis(ps1+j,pht1,pnp1[j],pkt1[j],1,1,0);
  printf(" in %f CPU secs.\n",(float)(clock()-t1)/(float) CLOCKS_PER_SEC);

/******************  Start main loop of program **************/

  for(;;){
    printf("How many to fit (0 to %i, -1=stop)\n",NPARAMS-2);
    scanf("%li",&npar);
    if(npar<0)break;
#if 1==1 /* Input starting parameters */
    printf("Input %i starting parameters\n",NPARAMS);
    for(i=0; i<NPARAMS; i++)scanf("%lf",par+i);
#endif

    if(npar!=0){
      printf("Input which %li couplings (0=m^2, ...)\n",npar);
      for(i=0; i<npar; i++)scanf("%i",wpar+i);
      for(i=0; i<npar; i++)vpar[i]=par[wpar[i]];

/*    Least squares fitting routine    
      subroutine lmdif(fcn,m,n,x,fvec,ftol,xtol,gtol,
 *                  maxfev,epsfcn,diag,mode,factor,nprint,info,nfev,
 *                 fjac,ldfjac,ipvt,qtf,wa1,wa2,wa3,wa4)  */
      LMDIF(FCN_TO_FORTRAN,&mm,&npar,vpar,fvec,&ftol,&xtol,&gtol,
            &maxfev,&epsfcn,diag,&mode,&factor,&ifail,&info,&nfev,
            fjac,&mm,ipvt,qtf,wa1,wa2,wa3,wa4);

      if(info<1||info>4) fprintf(stderr,__FILE__":  Error in lmdif info=%li\n",info);
      if(info==0)goto cleanup;
      /*Map best values into par[],*/
      for(i=0; i<npar; i++)par[wpar[i]]=vpar[i];
    } else {
      nfev=1; info=0; 
      fcn(&mm,&npar,vpar,fvec,&info);
    }
    if(info<0){
       fprintf(stderr,__FILE__":  Error in fcn info=%li\n",info);
      if(info<=-8)goto cleanup;
    }
    /* calculate chi^2 */
    chi2=fdot(fvec,fvec,mm);
    printf("%li steps chi2=%f with c_p=%f scale=%f\n",
	   nfev,chi2,best.pextrap,best.rescale);
    fprintf(fp,"%li %li " PDIGITS " " PDIGITS " " PDIGITS "\n",
	    info,npar?nfev:1,chi2,best.pextrap,best.rescale);
    printf("Final couplings: ");
    for(i=0; i<NPARAMS; i++){
      printf(" %f",par[i]); fprintf(fp,PDIGITS " ",par[i]);
    }
    if(npar>0)printf("\nwhile fitting");
    for(i=0; i<npar; i++){
      printf(" %i",wpar[i]); fprintf(fp," %i",wpar[i]);
    }
    printf("\nWinding: "); fprintf(fp,"\n");
    for(i=0; i<wth; i++){
      printf("%f ",best.wfitting[i]); fprintf(fp,"%f ",best.wfitting[i]);
    }
    printf("and long:");
    for(i=0; i<lth; i++){
      printf(" %f",best.lfitting[i]); fprintf(fp," %f",best.lfitting[i]);
    }
    fprintf(fp,"\n");
    printf("\nRoundness pairs:");
    for(i=0; i<FLONG; i++){
      printf("%f %f\n",best.lvalue[i],best.calclvalue[i]); 
      fprintf(fp,"%f %f  ",best.lvalue[i],best.calclvalue[i]);
    }
    fprintf(fp,"\n");
    printf("\n");
    for(i=0; i<TOTALSTATES; i++){
      for(j=0; j<NP1+1; j++){
	printf(" %f",best.rescale*best.spectrumvals[j][i]); 
	fprintf(fp," %f",best.rescale*best.spectrumvals[j][i]);
      }
      printf(" %f\n",best.c2[i]); fprintf(fp," %f\n",best.c2[i]);
    }
    for(i=0; i<NSTILL; i++){
      printf("o=% i multi=%i ",ostill[i],multistill[i]);
      for(j=0; j<NSTILLVALS; j++){
	printf("%f ",best.rescale*best.stillvals[i][j]); 
	fprintf(fp,"%f ",best.rescale*best.stillvals[i][j]);
      }
      fprintf(fp,"\n"); printf("\n");
    }
    fprintf(fp,"\n"); printf("\n");
    fflush(fp);
    fflush(stdout);
  }

cleanup:
  fclose(fp);
#if defined(USE_MPI)
  /************* Tell slaves to shut down. **************/
#if ONE_PROCESSOR_SPECTRUM
  MPI_Comm_size(MPI_COMM_WORLD,&myrank);
  for(i=1; i<myrank; i++)
    /* only the message tag is read by the slaves. */
    MPI_Send(&myrank,1,MPI_INT,i,0,MPI_COMM_WORLD);
#else
  MasterShutDown();
#endif
  MPI_Finalize();             /* cleanup MPI */
#else
  printf("Value of max_els is %i; value of max_base is %i\n",
	 max_els,max_base);
#endif
  return 0;
}


/*****************************************************************
******************************************************************


  The following routine calculates the fit criteria for
  a given set of fit parameters.  It returns the vector
  fvecc which is then squared to give the chi^2.

  The values that are calculated are reused from the previous
  iteration if the relevant couplings are unchanged.
  This includes:

         pextrap, wfitting, lfitting, lvalue[], spectrumvals

  Information about the spectra are returned in the structure
  best.  The members of the structure best 
  are from the lowest chi^2 iteration of fcn (which is
  not necessarily the last iteration of fcn.)

  On error, fcn sets *iflag to a negative integer.
  *flag<=-8 are "fatal" errors.


********************************************************************
*******************************************************************/


extern fsub fcn(integer *m, integer *npar, doublereal *xp, 
	       doublereal *fvecc, integer *iflag){
  /* fitc is the index for the fit criterion */
  int i,k,j,l,fitc;
  int prepeat=1,lrepeat=1,srepeat=1;
  integer nwind=NWIND,nlong=NLONG,th[1];
  element *vals,temp,sum,rescale,calclvalue[FLONG],zz,chi2;
  doublereal c2[TOTALSTATES];
  static doublereal spectrumvals[NP1+1][TOTALSTATES],
    stillvals[NSTILL][NSTILLVALS];
  static element wfitting[MAXTH],lfitting[MAXTH],lvalue[FLONG],pextrap;
  static element oldpar[NPARAMS],bestpar[NPARAMS],bestchi2;
  clock_t tf;
#if NWIND>NLONG
  element weights[NWIND*MAXTH],holdvals[NWIND];
#else
  element weights[NLONG*MAXTH],holdvals[NLONG];
#endif
#if defined(USE_MPI) && ONE_PROCESSOR_SPECTRUM
#define SMAX 200  /* maximum total number of matrices to calculate */
  sectors *mpibase[SMAX][2];
  int job,njob,mpinvals[SMAX],whichvals[SMAX];
  element angle[SMAX][HINDEX+2];
  static element *mpivals=NULL;
#else
  int idummy[HINDEX+2];
  doublereal dummy[HINDEX+2];
#endif
  tf=clock();

  /* bestpar is used later to see if unfitted variables have changed*/
  for(i=0; i<*npar; i++)bestpar[wpar[i]]=par[wpar[i]]=xp[i];

/* then find what needs to be recalculated */

#if 0 /* I am not 100% sure this works right */
  /* then find what needs to be recalculated */
  for(i=0; i<4 && fabs(par[i]-oldpar[i])<=2*ELEMENT_EPSILON; i++);
  prepeat=i<4;  /*recalculate "p-extrapolation", nonzero winding */
  for(i=0; i<NPARAMS && (i==4 || fabs(par[i]-oldpar[i])<=2*ELEMENT_EPSILON); 
      i++);
  lrepeat=i<NPARAMS;  /*recalculate nonzero L  */
  for(i=0; i<5 && fabs(par[i]-oldpar[i])<2*ELEMENT_EPSILON; i++);
  srepeat=i<5;  /*recalculate "spectrum", zero winding */
#endif

#if PRINTIT
  printf("  Parameters: ");
  for(i=0; i<NPARAMS; i++)printf(" %f",par[i]);
  printf("\n");
#endif

/*
         If a parmeter is very large, exit with fvecc set
         to parameters.  
         
         Also, exit with error if lambda_3 < -1000
*/

  for(i=0; i<NPARAMS; i++)
    if(fabs(par[i])>1.0e4)break;
  if(i<NPARAMS || par[4]<-1.0e3*par[1]){
     fprintf(stderr,__FILE__":  The value of parameters too large, exiting\n");
    for(j=0; j<NPARAMS; j++)
      fvecc[j]=par[j];
    for(; j<*m; j++)
      fvecc[j]=0.0;
    return SUBRETURN;
  }

#if defined(USE_MPI) && ONE_PROCESSOR_SPECTRUM

  /* Some global default initialization */
  for(i=0; i<SMAX; i++){
    mpinvals[i]=1;
    for(j=0; j<HINDEX+2; j++)angle[i][j]=0.0;
  }
  job=0;

  /* Calculate the convergence in p-truncation */

  /* First, Check if the couplings are the same as before */
  if(prepeat){
    for(j=0; j<PNS; job++, j++)
      mpibase[job][0]=mpibase[job][1]=ps1+j;

  /*  Calculate the transverse string tension. */
    for(i=0; i<nwind; i++)
      for(j=0; j<WNS; j++, job++)
	mpibase[job][0]=mpibase[job][1]=ws[i]+j;
  }

  /*  Calculate the longitudinal string tension  */
  if(lrepeat){
    for(i=0; i<nlong; i++)
      for(j=0; j<LNS; job++, j++){
	angle[job][HINDEX]=lkmax0[j];
	angle[job][HINDEX+1]=llong0[i];
	mpibase[job][0]=ls0+j;
	mpibase[job][1]=lt0+j;
      }

    /*  Calculate potential for n=1, L nonzero  */
    for(i=0; i<FLONG; i++)
      for(j=0; j<LNS; job++, j++){
	angle[job][HINDEX]=lkmax[i][j];
	angle[job][HINDEX+1]=llong[i];
	mpibase[job][0]=ls[i]+j;
	mpibase[job][1]=lt[i]+j;
      }
  }

  /*  Calculate the ordinary extrapolated spectra. */
  if(srepeat){
    for(i=0; i<NSTILL; i++)
      for(j=0; j<NS; job++, j++){
	mpinvals[job]=NSTILLVALS;
	mpibase[job][0]=mpibase[job][1]=stillbases[i]+j;
      }

    /*  Calculate the extrapolated spectrum for nonzero P_perp  */
    for(l=0; l<NP1; l++)
      for(i=0; i<NJPC; i++)
	for(j=0; j<NS; job++, j++){
	  for(k=0; k<HINDEX; k++)angle[job][k]=p1a[l][k];
	  mpinvals[job]=nvals[i]+1;
	  mpibase[job][0]=s[i]+j;
	  mpibase[job][1]=t[i]+j;
      }
  }

  /************ now distribute the work to the various slaves **********/
  njob=job;
  if(njob>SMAX){
     fprintf(stderr,__FILE__":  SMAX is too small, exiting\n.");
    *iflag=-8;
    return SUBRETURN;
  }
  for(i=0, j=0; i<njob; i++){
    whichvals[i]=j;
    j+=mpinvals[i];
  }
  mpivals=realloc(mpivals,j*sizeof(element));
#if 0 /* debugging print */
  printf("starting subroutine master with %i jobs.\n",njob);
#endif
  if(master(mpibase,par,angle,mpinvals,whichvals,mpivals,njob)){
     fprintf(stderr,__FILE__":  master() is returning an error, exiting\n");
    *iflag=-9;
    return SUBRETURN;
  }
#if 0 /* debugging print */
  printf("finished subroutine master \n");
  for(i=0; i<10; i++)
    printf("element %i, val %f, nvals=%i which=%i\n",i,mpivals[i],
	   mpinvals[i],whichvals[i]);
#endif

  /******** take results and calculate extrapolations etc. **************/
  job=0;

  /* Calculate the convergence in p-truncation */
  if(prepeat){
    pextrap=pextrapolate1(PNS,ps1,mpivals+whichvals[job]);
    job+=PNS;

    /*  Calculate the transverse string tension.  The wht matrix
	must be reformatted into a form compatable with
	the FORTRAN routince WWEIGHTS.  The result agrees
	with the result in the paper for the truncations and
	couplings in the paper.    */
    wweights(weights,&wth,nwind,wht);
    PSEUDOINVERSE(weights,&wth,&nwind);
    for(j=0; j<wth; j++)wfitting[j]=0.0;
    for(i=0; i<nwind; i++){
      vals=extrapolate1(WNS,ws[i],mpivals+whichvals[job],th,1,&pextrap);
      job+=WNS;
      holdvals[i]=vals[0];
      for(j=0; j<wth; j++)
	wfitting[j] += vals[0]*weights[i+j*nwind];
    }
  }


  /*  Calculate the longitudinal string tension  */
  if(lrepeat){
    lweights(weights,&lth,nlong,llong0);
    PSEUDOINVERSE(weights,&lth,&nlong);
    for(i=0; i<lth; i++)lfitting[i]=0.0;
    for(i=0; i<nlong; i++){
      vals=lextrapolate1(LNS,ls0,mpivals+whichvals[job],th,1,&pextrap,lkmax0);
      job+=LNS;
      holdvals[i]=vals[0];
      for(j=0; j<lth; j++)
	lfitting[j]+=vals[0]*weights[i+j*nlong];
    }
    
    /*  Calculate potential for n=1, L nonzero  */
    for(i=0; i<FLONG; i++){
      vals=lextrapolate1(LNS,ls[i],mpivals+whichvals[job],
			 th,1,&pextrap,lkmax[i]);
      job+=LNS;
      lvalue[i]=vals[0];
    }
  }

  /*  Calculate the ordinary extrapolated spectra. */
  /* Sort the second and third excited state */
  if(srepeat){
    for(i=0; i<NSTILL; i++){
      vals=extrapolate1(NS,stillbases[i],mpivals+whichvals[job],
			th,NSTILLVALS,&pextrap);
      job+=NS;
      if(NSTILLVALS>=4 && vals[2]>vals[3]){
	temp=vals[2]; vals[2]=vals[3]; vals[3]=temp;
      }
      for(k=0; k<NSTILLVALS; k++)stillvals[i][k]=vals[k];
    }
    /*  Calculate the extrapolated spectrum for nonzero P_perp  */
    /*  Here we sort the states before adding to array.  */
    for(l=0; l<NP1; l++)
      for(i=0, j=0; i<NJPC; j+=nvals[i], i++){
	vals=extrapolate1(NS,s[i],mpivals+whichvals[job],
			  th,nvals[i]+1,&pextrap);
	job+=NS;
	/* sort and add states: we assume nvals[i]+1 eigenvalues */
	k=0;
	addterms(spectrumvals[l+1]+j,&k,nvals[i],vals,nvals[i]+1);
      }
  }

  if(job!=njob){
     fprintf(stderr,__FILE__":  The number of spectra calculated, njob=%i,\n"
	    "and used, job=%i, do not match, exiting.\n",njob,job);
    *iflag=-10;
    return SUBRETURN;
  }


#else  /****** calculate everything without using MPI **************/

  /* Calculate the convergence in p-truncation */
  if(prepeat){
    pextrap=pextrapolate(PNS,ps1,par);
    
    /*  Calculate the transverse string tension.  The wht matrix
	must be reformatted into a form compatable with
	the FORTRAN routince WWEIGHTS.  The result agrees
	with the result in the paper for the truncations and
	couplings in the paper.    */
    wweights(weights,&wth,nwind,wht);
    PSEUDOINVERSE(weights,&wth,&nwind);
    for(j=0; j<wth; j++)wfitting[j]=0.0;
    for(i=0; i<nwind; i++){
      vals=extrapolate(WNS,ws[i],ws[i],th,1,par,&pextrap,idummy,dummy);
      holdvals[i]=vals[0];
      for(j=0; j<wth; j++)
	wfitting[j] += vals[0]*weights[i+j*nwind];
    }
  }


  /*  Calculate the longitudinal string tension  */
  /* First, Check if the couplings are the same as before */
  if(lrepeat){
    lweights(weights,&lth,nlong,llong0);
    PSEUDOINVERSE(weights,&lth,&nlong);
    for(i=0; i<lth; i++)lfitting[i]=0.0;
    for(i=0; i<nlong; i++){
      vals=lextrapolate(LNS,ls0,lt0,th,1,par,&pextrap,lkmax[i],llong0[i]);
      holdvals[i]=vals[0];
      for(j=0; j<lth; j++)
	lfitting[j]+=vals[0]*weights[i+j*nlong];
    }

    /*  Calculate potential for n=1, L nonzero  */
    for(i=0; i<FLONG; i++){
      vals=lextrapolate(LNS,ls[i],lt[i],th,1,par,&pextrap,lkmax0,llong[i]);
      lvalue[i]=vals[0];
    }
  }

  /*  Calculate the ordinary extrapolated spectra. */
  /* Sort the second and third excited state */
  if(srepeat){
    for(i=0; i<NSTILL; i++){
      vals=extrapolate(NS,stillbases[i],stillbases[i],th,NSTILLVALS,par,
		       &pextrap,idummy,dummy);
      if(NSTILLVALS>=4 && vals[2]>vals[3]){
	temp=vals[2]; vals[2]=vals[3]; vals[3]=temp;
      }
      for(k=0; k<NSTILLVALS; k++)stillvals[i][k]=vals[k];
    }

    /*  Calculate the extrapolated spectrum for nonzero P_perp  */
    /*  Here we sort the states before adding to array.  */
    for(l=0; l<NP1; l++)
      for(i=0, j=0; i<NJPC; j+=nvals[i], i++){
	vals=extrapolate(NS,s[i],t[i],th,nvals[i]+1,
			 par,&pextrap,idummy,p1a[l]);
	/* sort and add states: we assume nvals[i]+1 eigenvalues */
	k=0;
	addterms(spectrumvals[l+1]+j,&k,nvals[i],vals,nvals[i]+1);
      }
  }

#endif /* use or not use MPI to calculate spectrum */

    /*  Put ordinary spectrum values into nonzero P_perp array*/
    /*  The states must be compatable with */
    /*  the ordering obtained when calculating nonzero p_perp */
    /*  Here we sort the states before adding.  */
  /* Sort the second and third excited state */
  if(srepeat){
    for(i=0, vals=spectrumvals[0]; i<NJPC; vals+=nvals[i], i++){
      if(nvals[i]>NSTILLVALS){
	 fprintf(stderr,__FILE__":  Insufficient number of states in ordinary\n"
		"spectrum for use in nonzero P_perp, exiting\n");
	*iflag=-11;
	return SUBRETURN;
      }
      for(j=0, k=0; j<NSTILL; j++){
	if((stillbases[j]->multi==s[i]->multi && stillbases[j]->o==s[i]->o)||
	   (stillbases[j]->multi==t[i]->multi && stillbases[j]->o==t[i]->o))
	  addterms(vals,&k,nvals[i],stillvals[j],NSTILLVALS);
      }
    }
  }
    
  /*  copy couplings into oldparams */
  for(i=0; i<NPARAMS; i++)oldpar[i]=par[i];
  
/*
     There are several possible methods for setting
     the overall scale of the Hamiltonian  
*/
  if(scalemethod==0)          
/*                Find scale by long string tension */
    rescale=1.0/lfitting[0];
  else if(scalemethod==1)    
/*                Find scale by looking only at lowest state */
    rescale=teper[0][0]/stillvals[0][0];
  else if(scalemethod==2){
/*         Find scale by minimizing chi^2 for c^2 measurement */
    sum=0.0; chi2=0.0;
    for(i=0; i<TOTALSTATES; i++){
      rescale=spectrumvals[1][i]-spectrumvals[0][i];
      sum += rescale/pow(c2err[i],2);
      chi2 += pow(rescale/c2err[i],2);
    }
    rescale=sum/chi2*fdot(p1a[0],p1a[0],HINDEX)/wfitting[0];
    if(rescale>0.0)
      rescale=sqrt(rescale);
    else {
      rescale=0.0;
      *iflag = -1;
    }
  } else if(scalemethod==3){
/*       Find scale by minimizing overall chi^2 
         This does not include the consistancy of the L nonzero,
         n=1 datum because it is too complicated
         initial guess is from Teper's lowest state */
    for(rescale=teper[0][0]/stillvals[0][0], j=0; ; j++){
      sum=0.0; zz=0.0;
      for(i=0; i<FCSQUARED; i++){
	c2[i]=wfitting[0]*pow(rescale,2)*
	  (spectrumvals[1][i]-spectrumvals[0][i])/fdot(p1a[0],p1a[0],HINDEX);
	temp=2.0*c2[i]/rescale;
	sum += temp*(c2[i]-1.0)/pow(c2err[i],2);
	zz +=  pow(temp/c2err[i],2);
      }
      for(i=0; i<NSTILL; i++)
	for(k=0; k<NSTILLVALS && k<FTEPER; k++){
	  sum += stillvals[i][k]*(stillvals[i][k]*rescale-teper[i][k])/
	    pow(tepererr[i][k],2);
	  zz += pow(stillvals[i][k]/tepererr[i][k],2);
	}
#if FSCALE
      sum += lfitting[0]*(rescale*lfitting[0]-1.0)/pow(lerr0,2);
      zz += pow(lfitting[0]/lerr0,2);
#endif
      temp=rescale;
      /* I include a damping parameter here to improve
	 numerical stability (which was a problem for 3+1) */
      rescale -= 0.25*sum/zz;
#if 0 /* debugging print */
      if(j<50)printf("rescale=%.16g\n",rescale);
#endif
      if(fabs(temp-rescale)<2*ELEMENT_EPSILON)break;
      if(j==1000){
	 fprintf(stderr,__FILE__":  Scale determination not converged\n");
	*iflag=-2;
	break;
      }
    }
  } else {
     fprintf(stderr,__FILE__":  in fcn, scale method=%i is wrong.\n",scalemethod);
    *iflag=-12;
    return SUBRETURN;
  }

/*
         If the overall scale is something strange, print warning
*/
  if(rescale>1.0e10 || rescale < 0.0){
     fprintf(stderr,__FILE__":  The value of rescale is %e\n",rescale);
    printf("The value of rescale is %e\n",rescale);
  }

/*
         find error of speed of light squared
         with coupling g^2 N and "a" as determined above.
*/
  temp=wfitting[0]*pow(rescale,2)/fdot(p1a[0],p1a[0],HINDEX);
  for(i=0; i<TOTALSTATES; i++)
    c2[i] = (spectrumvals[1][i]-spectrumvals[0][i])*temp;
  for(fitc=0; fitc<FCSQUARED; fitc++)
    fvecc[fitc] = (c2[fitc]-1.0)/c2err[fitc];

  /* If the lowest state is tachyonic, modify the associated
     c^2 expression to reflect this */

  if(spectrumvals[0][0]<0.0 || spectrumvals[0][1]<0.0){
     fprintf(stderr,__FILE__":  The lowest state has gone tachyonic!\n");
    if(spectrumvals[0][0]<spectrumvals[0][1])
      fvecc[0]=10.0*fabs(spectrumvals[0][0])/c2err[0];
    else
      fvecc[0]=10.0*fabs(spectrumvals[0][1])/c2err[0];
  }

  /* Add n=1 L nonzero value to fitting criterion.  */
  for(i=0; i<FLONG; i++){
    temp=pow(llong[i],2)+wfitting[0]*(element) 
      dot(lht[i],lht[i],HINDEX)*pow(rescale,2);
    if(temp>0.0){
      calclvalue[i]=sqrt(temp);
      lweights(weights,&lth,1,calclvalue+i);
      calclvalue[i]=fdot(weights,lfitting,lth);
      fvecc[fitc++]=(lvalue[i]-calclvalue[i])/lerr[i];
    } else {
      fvecc[fitc++]=1.0;
      *iflag = -3;
    }
  }

  /*  Consistancy of longitudinal string tension with rescale */
#if FSCALE
  fvecc[fitc++]=(rescale*lfitting[0]-1.0)/lerr0;
#endif

  /* Add parity doublets to fitting criterion
     using fractional difference.   */
  /* Here we use some "model dependant" assumptions
     concerning the ordering of the states. */
#if FPARITY==2
  for(i=0; i<NSTILL && (stillbases[i]->multi!=1 || stillbases[i]->o!=1); i++);
  for(j=0; j<NSTILL && (stillbases[j]->multi!=2 || stillbases[j]->o!=-1); j++);
  if(i<NSTILL && j<NSTILL)
    fvecc[fitc++]=(stillvals[i][2]-stillvals[j][0])/
      (0.5*(stillvals[i][2]+stillvals[j][0])*parityerr[0]);
  for(i=0; i<NSTILL && (stillbases[i]->multi!=2 || stillbases[i]->o!=1); i++);
  for(j=0; j<NSTILL && (stillbases[j]->multi!=1 || stillbases[j]->o!=-1); j++);
  if(i<NSTILL && j<NSTILL)
    fvecc[fitc++]=(stillvals[i][2]-stillvals[j][0])/
      (0.5*(stillvals[i][2]+stillvals[j][0])*parityerr[1]);
#endif

  /* Add fit to Teper's spectrum */
  for(i=0; i<NSTILL; i++)
    for(k=0; k<NSTILLVALS && k<FTEPER; k++)
      fvecc[fitc++]=(rescale*stillvals[i][k]-teper[i][k])/tepererr[i][k];

  /* after including all fit criteria, make sure we 
      have actually filled the fvecc vector. */
  if(fitc!=*m){
     fprintf(stderr,__FILE__":  Error in in fcn, fitc=%i and should be %li\n",fitc,*m);
    *iflag = -13;
    return SUBRETURN;
  }

  /* check if this is the best chi^2 so far,
     if so, copy values into the structure best.
     bestpar makes sure that the unfitted variables are
     the same; that is the previous best values are
     from the same fit. */
  sum=fdot(fvecc,fvecc,*m);
  for(i=0; i<NPARAMS && fabs(bestpar[i]-par[i])<=2*ELEMENT_EPSILON; i++);
  if(*iflag>=0 && (i<NPARAMS || sum<bestchi2)){
    bestchi2=sum;
    for(i=0; i<NPARAMS; i++)bestpar[i]=par[i];
    best.pextrap=pextrap; 
    for(i=0; i<FLONG; i++){
      best.lvalue[i]=lvalue[i]; 
      best.calclvalue[i]=calclvalue[i];
    } 
    best.rescale=rescale;
    for(i=0; i<TOTALSTATES; i++){
      best.c2[i]=c2[i];
      for(j=0; j<NP1+1; j++)
	best.spectrumvals[j][i]=spectrumvals[j][i];
    }
    for(i=0; i<NSTILL; i++)
      for(j=0; j<NSTILLVALS; j++)
	best.stillvals[i][j]=stillvals[i][j];
    for(i=0; i<wth; i++)best.wfitting[i]=wfitting[i];
    for(i=0; i<lth; i++)best.lfitting[i]=lfitting[i];
  }

#if PRINTIT
  printf("  fvecc values =");
  for(i=0, k=3; i<*m; i++){
    printf(" %f",fvecc[i]);
    if((k++)%6==0 && i+1<*m)printf("\n  ");
  }
  printf("\n  Chi^2=%f in a total of %f CPU seconds\n\n",
	 fdot(fvecc,fvecc,*m),
	 (float)(clock()-tf)/(float) CLOCKS_PER_SEC);
#endif
#if PRINTIT
  fflush(stdout);
#endif
  return SUBRETURN;
}

#endif