********> bugfix 3. Author: Dan Roe Date: 29 August 2008 Programs: ptraj Description: Fixes multiple problems. 1) Ptraj did not correctly open netcdf files. 2) Ptraj misidentified Amber REMD restarts as trajectories. 3) io.c contained duplicate code in openFile() that resulted in strange behavior for bzipped files. 4) gzipped amber remd trajectories are now handled correctly. 5) remd trajectories can now have numerical extensions of arbitrary width, not just %03i. 6) Better error checking and more informative error messages for remd traj code. Affected files: ptraj.c, io.c, trajectory.h Fix: Run this patch in the $AMBERHOME/src/ptraj directory, with: $ patch -p0 -N -r patch_rejects < ------------------------------------------------------------------------------ --- ptraj.c 2008-08-22 14:20:39.000000000 -0400 +++ ptraj.c 2008-08-22 14:20:29.000000000 -0400 @@ -2081,9 +2081,7 @@ NCInfo->ncid = ncid; } else #endif - - err = openFile(&fp, filename, "r"); - if ( err == 0 ) { + if ( openFile(&fp, filename, "r") == 0 ) { fprintf(stdout, "trajin %s ignored; could not open file (%s)\n", filename, filename); safe_free(trajInfo->filename); diff -Naur io.c io.c --- io.c 2008-08-29 11:31:32.000000000 -0400 +++ io.c 2008-08-29 11:31:03.000000000 -0400 @@ -658,6 +658,7 @@ /* * Open file specified by filename with "mode" to the file pointer * fpp + * Returns 0 if an error occured, otherwise returns 1 */ int @@ -721,26 +722,9 @@ sprintf(buffer, "gzip >> %s", filename); else is_compressed = NULL; - } - - if (is_compressed != NULL) { - - /* - * trajectory is compressed, use popen() - */ - - if (prnlev > 2) - fprintf(stdout, "Opening compressed file: %s\n", buffer); - if ( ( *fpp = safe_popen(buffer, mode) ) == NULL ) { - if (prnlev > 4) - fprintf(stdout, "\nCould not open compressed file (%s) with mode (%s)\n", - buffer, mode); - return 0; - } } else if ( (is_compressed = strstr(filename, ".bz2")) != NULL && - (is_compressed[4] == (char) 0 || isspace(is_compressed[4])) ) - + (is_compressed[4] == (char) 0 || isspace(is_compressed[4])) ) { if (mode[0] == 'r') sprintf(buffer, "bunzip2 -c %s", filename); else if (mode[0] == 'w') @@ -749,16 +733,17 @@ sprintf(buffer, "bzip2 >> %s", filename); else is_compressed = NULL; - + } + if (is_compressed != NULL) { - + /* * trajectory is compressed, use popen() */ - + if (prnlev > 2) fprintf(stdout, "Opening compressed file: %s\n", buffer); - + if ( ( *fpp = safe_popen(buffer, mode) ) == NULL ) { if (prnlev > 4) fprintf(stdout, "Could not open compressed file (%s) with mode (%s)\n", diff -Naur ptraj.c ptraj.c --- ptraj.c 2008-08-29 11:31:15.000000000 -0400 +++ ptraj.c 2008-08-29 11:31:03.000000000 -0400 @@ -2190,11 +2190,13 @@ } /* - * Can we scan in three numbers from the first line? If so this is + * DAN ROE - Modified so that ptraj will recognize restarts from + * REMD, which have 3 numbers in the first line. + * Can we scan in four numbers from the first line? If so this is * an AMBER trajectory otherwise assume it is an AMBER restrt file */ if ( type == COORD_UNKNOWN ) { - if ( (i = sscanf(buffer2, "%f%f%f", &junk1, &junk2, &junk3)) == 3 ) + if ( (i = sscanf(buffer2, "%f%f%f%f", &junk1, &junk2, &junk3, &junk4)) == 4 ) type = COORD_AMBER_TRAJECTORY; else type = COORD_AMBER_RESTART; @@ -3380,8 +3382,8 @@ case COORD_AMBER_REMD: fprintf(stdout, " File (%s) is an AMBER REMD (new format) trajectory%s", p->filename, (p->isBox ? " (with box info)" : "")); - fprintf(stdout,", %i files total (First index is %03i), ",p->numREMDTRAJ, - p->firstREMDTRAJ); + fprintf(stdout,", %i files total (First index is %0*i), ",p->numREMDTRAJ, + p->EXTwidth, p->firstREMDTRAJ); if (p->isREMDTRAJ>0) fprintf(stdout,"frames at %lf K will be used, ",p->remdtrajtemp); else @@ -3730,8 +3732,9 @@ */ fprintf(stdout, "\nProcessing AMBER REMD trajectory (new format) "); if (currentCoordinateInfo->isREMDTRAJ>0) { - fprintf(stdout, "files %s -> %s%03i\n", + fprintf(stdout, "files %s -> %s%0*i\n", currentCoordinateInfo->filename,currentCoordinateInfo->baseFilename, + currentCoordinateInfo->EXTwidth, currentCoordinateInfo->firstREMDTRAJ+(currentCoordinateInfo->numREMDTRAJ-1)); /* * Now open up the rest of the REMD traj files @@ -3744,9 +3747,11 @@ /*NOTE: Currently spot 0 is not used; inefficient */ for (i=1; inumREMDTRAJ; i++) { if (currentCoordinateInfo->isREMDTRAJ==1) - sprintf(repFilename,"%s%03i",currentCoordinateInfo->baseFilename,j++); + sprintf(repFilename,"%s%0*i",currentCoordinateInfo->baseFilename, + currentCoordinateInfo->EXTwidth,j++); else if (currentCoordinateInfo->isREMDTRAJ==2) - sprintf(repFilename,"%s%03i.gz",currentCoordinateInfo->baseFilename,j++); + sprintf(repFilename,"%s%0*i.gz",currentCoordinateInfo->baseFilename, + currentCoordinateInfo->EXTwidth,j++); fprintf (stdout," Opening %s\n",repFilename); if (openFile(currentCoordinateInfo->remdtrajfiles+i,repFilename,"r")==0) { @@ -4014,12 +4019,10 @@ currentRep=currentCoordinateInfo->remdtrajfiles[i]; /* * Read in REMD header line + * */ - fscanf(currentRep,"%*s"); /*REMD*/ - fscanf(currentRep,"%*s"); /*Replica #*/ - fscanf(currentRep,"%*s"); /*exchange #*/ - fscanf(currentRep,"%*s"); /*step #*/ - fscanf(currentRep,"%lf",&repTemp); /*temp0*/ + fscanf(currentRep,"%*s %*s %*s %*s %lf",&repTemp); + /*fprintf(stdout,"REMD DEBUG: Rep%03i temp= %lf\n",i,repTemp); */ /* @@ -4053,7 +4056,12 @@ safe_free(currentCoordinateInfo->baseFilename); safe_fclose(currentCoordinateInfo->file); currentCoordinateInfo->file = NULL; - fprintf(stdout,"DAN DEBUG: Final repTemp value read= %lf\n",repTemp); + fprintf(stdout,"\nREMDTRAJ: Final repTemp value read= %lf, set %i\n",repTemp,set); + fprintf(stdout,"Could not find target %lf in any of the replica trajectories.\n", + currentCoordinateInfo->remdtrajtemp); + fprintf(stdout,"Check that all replica trajectory files were found and that\n"); + fprintf(stdout, + "none of the trajectories are corrupted (e.g. missing a temperature).\n"); error(ROUTINE, "Target replica temperature not found in traj!"); return; } @@ -4071,7 +4079,7 @@ } *processCoordinates = 1; } - /* cleanup if necessary */ + /* REMDTRAJ: cleanup if necessary */ /* NOTES: Should this go somewhere else? */ if ( *readCoordinates == 0 ) { *processCoordinates = 0; @@ -5746,7 +5754,6 @@ info->isREMDTRAJ=argumentStackContains(&argumentStack,"remdtraj"); info->remdtrajtemp=argumentStackKeyToDouble(&argumentStack, "remdtrajtemp", 0.0); - /*fprintf(stdout," REMD DEBUG: Frames at %lf K will be processed.\n",info->remdtrajtemp);*/ /* DAN ROE: If the coordinate type is COORD_AMBER_REMD (new remd format) * and remdtraj was specified in the trajin, we need to look * for and open any additional traj files. @@ -5755,91 +5762,125 @@ fprintf(stdout," REMDTRAJ: Frames at %lf K will be processed.\n",info->remdtrajtemp); /* * Scan for additional REMD traj files. - * Assume the extension of given trajectory is in the format %03i - * and represents the lowest replica, and that the other files are in - * sequence (i.e. 000, 001, 002 etc). - * - * Get filename 3 char extension - this is assumed to be the first replica + * Assume the extension of given trajectory is the number + * of the lowest replica, and that the other files are in + * sequence (e.g. rem.000, rem.001, rem.002 etc). */ - i=strlen(filename); - EXT=(char*) safe_malloc(4*sizeof(char)); - EXT[0]=filename[i-3]; - EXT[1]=filename[i-2]; - EXT[2]=filename[i-1]; - EXT[3]='\0'; - + loop=strlen(filename); /* Was the file zipped? Currently only the .gz extension is looked for */ - if (strcmp(EXT,".gz")==0) { + if ( (filename[loop-3]=='.')&&(filename[loop-2]=='g')&&(filename[loop-1]=='z') ) { fprintf(stdout," REMDTRAJ: File is gzipped.\n"); info->isREMDTRAJ=2; - EXT[0]=filename[i-6]; - EXT[1]=filename[i-5]; - EXT[2]=filename[i-4]; - EXT[3]='\0'; + loop=loop-3; } - /* Check that extension is a number */ - for (j=0; j<2; j++) { + /* First, find location of last '.' and store it in i*/ + for (j=0; j0) { + fprintf(stdout," REMDDEBUG: Last . in %s located at %i\n",filename,i); + fprintf(stdout," REMDDEBUG: Allocating %i for extension\n",loop-i); + fprintf(stdout," REMDDEBUG: EXTwidth=%i\n",loop-i-1); + } + + /* Get filename extension */ + EXT=(char*) safe_malloc((loop-i)*sizeof(char)); + info->EXTwidth=loop-i-1; + k=0; + for (j=i+1; j0) fprintf(stdout, " REMDDEBUG: Replica extension is %s\n",EXT); + + /* Check that all digits in extension are numbers */ + for (j=0; j57) ) { - fprintf(stdout,"WARNING: REMDTRAJ: Extension is not a number! %s\n",EXT); + fprintf(stdout, + "REMDTRAJ: WARNING: Character #%i (%c) in extension %s is not a number!\n", + j,EXT[j],EXT); fprintf(stdout," Trajectory will be processed as replica traj.\n"); safe_free(EXT); info->isREMDTRAJ=0; } } - /* Look for the other replica files, assuming the name is basefilename.XXX or - * basefilename.XXX.gz + /* Look for the other replica files, assuming the name is basefilename.num or + * basefilename.num.gz */ if (info->isREMDTRAJ>0) { j=atoi(EXT); - if (prnlev>0) fprintf(stderr,"REMD DEBUG: index of first replica = %i\n",j); + if (prnlev>0) fprintf(stderr," REMDDEBUG: index of first replica = %i\n",j); safe_free(EXT); info->firstREMDTRAJ=j; + /* - * allocate memory for the filename + * Allocate memory for the replica filenames. */ - info->baseFilename=(char*) safe_malloc((i+1)*sizeof(char)); - repFilename=(char*) safe_malloc((i+1)*sizeof(char)); + k=strlen(filename); + info->baseFilename=(char*) safe_malloc((k+1)*sizeof(char)); + repFilename=(char*) safe_malloc((k+1)*sizeof(char)); + /* - * store base filename + * Store base filename. + * Variable 'i' still contains location of '.' before # extension + */ + strncpy(info->baseFilename,filename,i+1); + if (prnlev>0) fprintf(stderr," REMDDEBUG: base filename = %s\n",info->baseFilename); + + /* + * Search for a replica number lower than this. Correct functioning + * of the replica code requires the file specified by trajin be the + * lowest # replica. */ if (info->isREMDTRAJ==1) - strncpy(info->baseFilename,filename,i-3); + sprintf(repFilename,"%s%0*i",info->baseFilename,info->EXTwidth,j-1); else if (info->isREMDTRAJ==2) - strncpy(info->baseFilename,filename,i-6); + sprintf(repFilename,"%s%0*i.gz",info->baseFilename,info->EXTwidth,j-1); + if ( (REMDTRAJFILE=fopen(repFilename,"r"))!=NULL ) { + fprintf(stdout, + " REMDTRAJ: WARNING: Replica# found lower than file specified with trajin!\n"); + fprintf(stdout, " (Found %s)\n",repFilename); + fprintf(stdout, " trajin remdtraj requires lowest # replica!\n"); + fclose(REMDTRAJFILE); + } - if (prnlev>0) fprintf(stderr,"REMD DEBUG: base filename = %s\n",info->baseFilename); /* * Now count REMD files */ - i=1; /* file pointer index */ - loop=1; - j++; /* replica file index, start at next replica file */ + i=1; /* # of replica files counted. */ + loop=1; /* Continue loop */ + j++; /* Replica file index, start at next replica file */ fprintf(stderr," REMDTRAJ: Scanning for other REMD files.\n"); while ( loop == 1 ) { /* * filename to scan for */ if (info->isREMDTRAJ==1) - sprintf(repFilename,"%s%03i",info->baseFilename,j); + sprintf(repFilename,"%s%0*i",info->baseFilename,info->EXTwidth,j); else if (info->isREMDTRAJ==2) - sprintf(repFilename,"%s%03i.gz",info->baseFilename,j); + sprintf(repFilename,"%s%0*i.gz",info->baseFilename,info->EXTwidth,j); - if ( openFile(&REMDTRAJFILE,repFilename,"r") == 0 ) { + /* DAN ROE - For some reason popen() called by openFile does not + * return NULL if a gzipped file does not exist. Use + * fopen() here for now since we dont need to do any + * reading yet anyway. + */ + if ( (REMDTRAJFILE=fopen(repFilename,"r"))==NULL ) { loop=0; - if (prnlev>0) fprintf(stderr,"REMD DEBUG: %s not found, exiting loop.\n",repFilename); + if (prnlev>0) fprintf(stderr," REMDDEBUG: %s not found, exiting loop.\n",repFilename); } else { i++; j++; - if (prnlev>0) fprintf(stderr," REMD DEBUG: Found %s\n",repFilename); - safe_fclose(REMDTRAJFILE); + if (prnlev>0) fprintf(stderr," REMDDEBUG: Found %s\n",repFilename); + fclose(REMDTRAJFILE); } - } + } /* End loop to find replica trajs */ fprintf(stderr," REMDTRAJ: Found %i replica traj files.\n",i); info->numREMDTRAJ=i; safe_free(repFilename); } - } + } /* END AMBER REPLICA TRAJECTORY SETUP */ /* * set start, stop and offset if they were specified and relevent diff -Naur trajectory.h trajectory.h --- trajectory.h 2008-08-29 11:31:24.000000000 -0400 +++ trajectory.h 2008-08-29 11:31:03.000000000 -0400 @@ -107,11 +107,15 @@ int nlescopy; LesStatus les_status; /* - * REMD info + * REMD Trajectory info */ - int isREMDTRAJ; + int isREMDTRAJ; /* 0 - Process as normal trajectory, + * >0 - Process as replica trajectory, + * 2 - Files have .gz extension + */ int numREMDTRAJ; /* How many replica trajectories are present */ int firstREMDTRAJ; /* Index of first replica */ + int EXTwidth; /* Length of replica extension */ FILE** remdtrajfiles; /* To hold replica trajectory files */ char* baseFilename; /* To hold replica traj base filename */ int linesperset; /* for fast scanthrough of other REMD files */ --------------------------------------------------------------------------- Workarounds: none.