*******> update.1 Author: Charles Lin Date: May 5, 2016 Programs: pmemd, pmemd.cuda, pmemd.cuda.MPI Description: Adds support for GPU Eternal Electric Fields: 1) Adds support for external electric fields on GPU (serial + MPI) 2) Cleans up some extraneous code in serial electric fields 3) Adds test cases for GPU electric fields -------------------------------------------------------------------------------- src/pmemd/src/cuda/Makefile | 5 +- src/pmemd/src/cuda/Makefile.advanced | 3 +- src/pmemd/src/cuda/gpu.cpp | 37 +- src/pmemd/src/cuda/gpu.h | 4 + src/pmemd/src/cuda/gputypes.cpp | 7 + src/pmemd/src/cuda/gputypes.h | 15 +- src/pmemd/src/cuda/kCalculateEFieldEnergy.cu | 80 +++++ src/pmemd/src/cuda/kEFE.h | 84 +++++ src/pmemd/src/cuda/kForcesUpdate.cu | 2 +- src/pmemd/src/cuda/kU.h | 2 +- src/pmemd/src/get_efield_energy.F90 | 16 +- src/pmemd/src/pme_alltasks_setup.F90 | 3 +- src/pmemd/src/pme_force.F90 | 10 +- test/cuda/4096wat/Run.pure_wat_efield | 69 ++++ test/cuda/4096wat/mdout.pure_wat_efield.GPU_DPFP | 407 ++++++++++++++++++++++ test/cuda/4096wat/mdout.pure_wat_efield.GPU_SPFP | 413 ++++++++++++++++++++++ test/cuda/4096wat/mdout.pure_wat_efield.GPU_SPXP | 420 +++++++++++++++++++++++ test/cuda/Makefile | 1 + 18 files changed, 1547 insertions(+), 31 deletions(-) diff --git src/pmemd/src/cuda/Makefile src/pmemd/src/cuda/Makefile index 74a8100..5e89499 100644 --- src/pmemd/src/cuda/Makefile +++ src/pmemd/src/cuda/Makefile @@ -5,7 +5,8 @@ include ../../../config.h CU_OBJS = cuda_info.o gpu.o gputypes.o \ kForcesUpdate.o kCalculateLocalForces.o kCalculateGBBornRadii.o \ kCalculatePMENonbondEnergy.o kCalculateGBNonbondEnergy1.o kNLRadixSort.o \ - kCalculateGBNonbondEnergy2.o kShake.o kNeighborList.o kPMEInterpolation.o + kCalculateGBNonbondEnergy2.o kShake.o kNeighborList.o kPMEInterpolation.o \ + kCalculateEFieldEnergy.o .SUFFIXES: .F90 .cpp .cu .o @@ -22,6 +23,7 @@ kNeighborList.cu.CU_FLAGS = kCalculatePMENonbondEnergy.cu.CU_FLAGS = kCalculateAMDWeights.cu.CU_FLAGS = kCalculateGAMDWeights.cu.CU_FLAGS = +kCalculateEFieldEnergy.cu.CU_FLAGS = cuda.a: $(CU_OBJS) ar rvs cuda.a $(CU_OBJS) @@ -50,6 +52,7 @@ kCalculateGBNonbondEnergy2.o: kCalculateGBNonbondEnergy2.h kCalculateGBBornRadii.o: kCalculateGBBornRadii.h kCalculateLocalForces.o: kCCF.h kCLF.h kCNF.h kCLFdih.h kCLFdihGaMD.h kPMEInterpolation.o: kPGGW.h kPGS.h kPSSE.h +kCalculateEFieldEnergy.o: kEFE.h kShake.o: kShake.h $(CU_OBJS): gpu.h gputypes.h diff --git src/pmemd/src/cuda/Makefile.advanced src/pmemd/src/cuda/Makefile.advanced index aded597..7706edd 100644 --- src/pmemd/src/cuda/Makefile.advanced +++ src/pmemd/src/cuda/Makefile.advanced @@ -6,7 +6,7 @@ CU_OBJS = cuda_info.o gpu.o gputypes.o \ kForcesUpdate.o kCalculateLocalForces.o kCalculateGBBornRadii.o \ kCalculatePMENonbondEnergy.o radixsort.o radixsort_c.o \ kCalculateGBNonbondEnergy1.o kCalculateGBNonbondEnergy2.o \ - kShake.o kNeighborList.o kPMEInterpolation.o \ + kShake.o kNeighborList.o kPMEInterpolation.o kCalculateEFieldEnergy.o \ cudpp_scan.o cudpp_scan_c.o .SUFFIXES: .fpp .cpp .cu .o @@ -21,6 +21,7 @@ kShake.cu.CU_FLAGS = kPMEInterpolation.cu.CU_FLAGS = kNeighborList.cu.CU_FLAGS = kCalculatePMENonbondEnergy.cu.CU_FLAGS = +kCalculateEFieldEnergy.cu.CU_FLAGS = cudpp_scan.cu.CU_FLAGS = cuda.a: $(CU_OBJS) diff --git src/pmemd/src/cuda/gpu.cpp src/pmemd/src/cuda/gpu.cpp index 34828b5..0fdd5a4 100644 --- src/pmemd/src/cuda/gpu.cpp +++ src/pmemd/src/cuda/gpu.cpp @@ -3329,7 +3329,8 @@ PRINTMETHOD("gpu_create_outputbuffers"); gpu->sim.pENMRr6avDistance = gpu->sim.pEnergyBuffer + 14; gpu->sim.pENMRAngle = gpu->sim.pEnergyBuffer + 15; gpu->sim.pENMRTorsion = gpu->sim.pEnergyBuffer + 16; - gpu->sim.pESurf = gpu->sim.pEnergyBuffer + 17; +// gpu->sim.pESurf = gpu->sim.pEnergyBuffer + 17; //Surf seems to be surface area term for GBSA. Does not seem to be coded in for CUDA. + gpu->sim.pEEField = gpu->sim.pEnergyBuffer + 17; gpu->sim.pVirial = gpu->sim.pEnergyBuffer + VIRIALOFFSET; gpu->sim.pVirial_11 = gpu->sim.pEnergyBuffer + VIRIALOFFSET; gpu->sim.pVirial_22 = gpu->sim.pEnergyBuffer + VIRIALOFFSET + 1; @@ -3427,6 +3428,7 @@ PRINTMETHOD("gpuCopyConstants"); } SetkPMEInterpolationSim(gpu); SetkNeighborListSim(gpu); + SetkCalculateEFieldEnergySim(gpu); SetkCalculatePMENonbondEnergySim(gpu); } } @@ -5121,7 +5123,7 @@ PRINTMETHOD("gpu_pme_ntp_setup"); #endif } -extern "C" void gpu_pme_alltasks_setup_(int* nfft1, int* nfft2, int* nfft3, double* prefac1, double* prefac2, double* prefac3, double* ew_coeff, int* ips, double* fswitch) +extern "C" void gpu_pme_alltasks_setup_(int* nfft1, int* nfft2, int* nfft3, double* prefac1, double* prefac2, double* prefac3, double* ew_coeff, int* ips, double* fswitch, double* efx, double* efy, double* efz, int* efn, double* efphase, double* effreq) { PRINTMETHOD("gpu_pme_alltasks_setup"); @@ -5139,6 +5141,12 @@ PRINTMETHOD("gpu_pme_alltasks_setup"); int n1 = ((*nfft1 + 1) + PADDING) & PADDINGMASK; int n2 = ((*nfft2 + 1) + PADDING) & PADDINGMASK; int n3 = ((*nfft3 + 1) + PADDING) & PADDINGMASK; + gpu->sim.efx = *efx; + gpu->sim.efy = *efy; + gpu->sim.efz = *efz; + gpu->sim.efn = *efn; + gpu->sim.efphase = *efphase; + gpu->sim.effreq = *effreq; gpu->sim.fswitch = *fswitch; gpu->sim.fswitch2 = gpu->sim.fswitch * gpu->sim.fswitch; gpu->sim.fswitch3 = gpu->sim.fswitch * gpu->sim.fswitch2; @@ -5611,7 +5619,7 @@ PRINTMETHOD("gpu_allreduce"); } #endif -extern "C" void gpu_pme_ene_(double* ewaldcof, double* vol, pme_pot_ene_rec* pEnergy, double enmr[3], double virial[3], double ekcmt[3]) +extern "C" void gpu_pme_ene_(double* ewaldcof, double* vol, pme_pot_ene_rec* pEnergy, double enmr[3], double virial[3], double ekcmt[3], int* nstep, double* dt) { PRINTMETHOD("gpu_pme_ene"); // Rebuild neighbor list @@ -5673,6 +5681,12 @@ PRINTMETHOD("gpu_pme_ene"); kCalculatePMENonbondEnergy(gpu); } + // Electric Field Energy + if (gpu->sim.efx != 0 || gpu->sim.efy != 0 || gpu->sim.efz != 0) + { + SetkCalculateEFieldEnergySim(gpu); + kCalculateEFieldEnergy(gpu, *nstep, *dt); + } if (gpu->sim.EPs > 0) kOrientForces(gpu); @@ -5777,8 +5791,8 @@ PRINTMETHOD("gpu_pme_ene"); energy[i] = (PMEDouble)val / ENERGYSCALE; } pEnergy->total += energy[i]; - //printf("pre-virial %6d %16.7f\n", i, energy[i]); - //printf("%06d %6d %16.7f\n", gpu->gpuID, i, energy[i]); +// printf("pre-virial %6d %16.7f\n", i, energy[i]); +// printf("%06d %6d %16.7f\n", gpu->gpuID, i, energy[i]); } for (int i = VIRIALOFFSET; i < ENERGYTERMS; i++) { @@ -5835,6 +5849,7 @@ PRINTMETHOD("gpu_pme_ene"); enmr[0] = energy[14]; enmr[1] = energy[15]; enmr[2] = energy[16]; + pEnergy->efield = energy[17]; // Grab virial if needed if ((gpu->sim.ntp > 0) && (gpu->sim.barostat == 1)) { @@ -5867,7 +5882,7 @@ PRINTMETHOD("gpu_pme_ene"); #endif } -extern "C" void gpu_pme_force_(double* ewaldcof, double* vol, double virial[3], double ekcmt[3]) +extern "C" void gpu_pme_force_(double* ewaldcof, double* vol, double virial[3], double ekcmt[3], int nstep, double dt) { PRINTMETHOD("gpu_pme_force"); // Rebuild neighbor list @@ -5930,6 +5945,13 @@ PRINTMETHOD("gpu_pme_force"); kCalculatePMENonbondForces(gpu); } + // Electric Field Forces + if (gpu->sim.efx != 0 || gpu->sim.efy != 0 || gpu->sim.efz != 0) + { + SetkCalculateEFieldEnergySim(gpu); + kCalculateEFieldForces(gpu, nstep, dt); + } + if (gpu->sim.EPs > 0) kOrientForces(gpu); } @@ -6072,7 +6094,7 @@ PRINTMETHOD("gpu_ips_ene"); pEnergy->elec_tot = energy[10]; else #endif - pEnergy->elec_tot = energy[10] + gpu->sim.EIPSEL + gpu->sim.eipssel; + pEnergy->elec_tot = energy[10] + gpu->sim.EIPSEL + gpu->sim.eipssel; pEnergy->elec_dir = pEnergy->elec_tot; pEnergy->elec_recip = 0.0; pEnergy->elec_nb_adjust = 0.0; @@ -6089,6 +6111,7 @@ PRINTMETHOD("gpu_ips_ene"); enmr[0] = energy[14]; enmr[1] = energy[15]; enmr[2] = energy[16]; + pEnergy->efield = energy[17]; // Grab virial if needed if ((gpu->sim.ntp > 0) && (gpu->sim.barostat == 1)) diff --git src/pmemd/src/cuda/gpu.h src/pmemd/src/cuda/gpu.h index 1a794b9..ce7d06a 100644 --- src/pmemd/src/cuda/gpu.h +++ src/pmemd/src/cuda/gpu.h @@ -96,6 +96,8 @@ extern "C" void kCalculateLocalForcesInitKernels(gpuContext gpu); extern "C" void kShakeInitKernels(gpuContext gpu); extern "C" void SetkForcesUpdateSim(gpuContext gpu); extern "C" void GetkForcesUpdateSim(gpuContext gpu); +extern "C" void SetkCalculateEFieldEnergySim(gpuContext gpu); +extern "C" void GetkCalculateEFieldEnergySim(gpuContext gpu); extern "C" void SetkCalculateLocalForcesSim(gpuContext gpu); extern "C" void GetkCalculateLocalForcesSim(gpuContext gpu); extern "C" void SetkCalculateGBBornRadiiSim(gpuContext gpu); @@ -147,6 +149,8 @@ extern "C" void kRelaxMDUpdate(gpuContext gpu, PMEDouble dt, PMEDouble temp0, PM extern "C" void kShake(gpuContext gpu); extern "C" void kFastShake(gpuContext gpu); extern "C" void kCalculateKineticEnergy(gpuContext gpu, PMEFloat c_ave); +extern "C" void kCalculateEFieldForces(gpuContext gpu, int nstep, double dt); +extern "C" void kCalculateEFieldEnergy(gpuContext gpu, int nstep, double dt); extern "C" void kCalculateCOM(gpuContext gpu); extern "C" void kCalculateSoluteCOM(gpuContext gpu); extern "C" void kReduceSoluteCOM(gpuContext gpu); diff --git src/pmemd/src/cuda/gputypes.cpp src/pmemd/src/cuda/gputypes.cpp index 0467f3a..291d78b 100644 --- src/pmemd/src/cuda/gputypes.cpp +++ src/pmemd/src/cuda/gputypes.cpp @@ -30,6 +30,13 @@ void clearCudaSimulation(cudaSimulation& sim) sim.scee = (1.0 / 1.2); sim.cut = 8.0; sim.cut2 = sim.cut * sim.cut; + sim.fswitch = -1; + sim.efx = 0; + sim.efy = 0; + sim.efz = 0; + sim.efn = 0; + sim.efphase = 0; + sim.effreq = 0; sim.skinnb = 2.0f; sim.dielc = 1.0; sim.tol = 0.0001; diff --git src/pmemd/src/cuda/gputypes.h src/pmemd/src/cuda/gputypes.h index 9ba868b..d7a76e2 100644 --- src/pmemd/src/cuda/gputypes.h +++ src/pmemd/src/cuda/gputypes.h @@ -167,10 +167,10 @@ enum { NLEXCLUSIONSHIFT = 8, NLEXCLUSIONATOMMASK = ((1 << NLEXCLUSIONSHIFT) - 1), - VIRIALOFFSET = 18, + VIRIALOFFSET = 19, AMDEDIHEDRALOFFSET = 25, GAMDEDIHEDRALOFFSET = 26, - ENERGYTERMS = 27, + ENERGYTERMS = 28, TI_ENERGYTERMS = ENERGYTERMS * 3, PADDING = 16, PADDINGMASK = 0xfffffff0, @@ -442,6 +442,8 @@ struct pme_pot_ene_rec double cmap; double amd_boost; double gamd_boost; + double emap; + double efield; }; struct NTPData @@ -697,6 +699,12 @@ struct cudaSimulation { PMEFloat cut3invcut3minfswitch3; // VDW force switch constant cut3 / (cut3-fswitch3) PMEFloat cutPlusSkin; // Nonbond interaction cutooff plus skin PMEFloat cutPlusSkin2; // Nonbond interaction cutooff plus skin squared + int efn; // Normalize electric field vectors + PMEFloat efx; // Electric field x vector + PMEFloat efy; // Electric field y vector + PMEFloat efz; // Electric field z vector + PMEFloat efphase; // Electric field spatial phase + PMEFloat effreq; // Electric field time frequency double dielc; // Dielectric constant double gamma_ln; // Langevin integration parameter double c_ave; // Langevin integration parameter @@ -1151,7 +1159,8 @@ struct cudaSimulation { unsigned long long int* pENMRr6avDistance; // Pointer to NMR r6av distance energy unsigned long long int* pENMRAngle; // Pointer to NMR angle energy unsigned long long int* pENMRTorsion; // Pointer to NMR torsion energy - unsigned long long int* pESurf; // Pointer to GBSA surface energy +// unsigned long long int* pESurf; // Pointer to GBSA surface energy + unsigned long long int* pEEField; // Pointer to Electric Field energy unsigned long long int* pVirial; // Pointer to PME virial unsigned long long int* pVirial_11; // Pointer to PME virial component unsigned long long int* pVirial_22; // Pointer to PME virial component diff --git src/pmemd/src/cuda/kCalculateEFieldEnergy.cu src/pmemd/src/cuda/kCalculateEFieldEnergy.cu new file mode 100644 index 0000000..cd5224d --- /dev/null +++ src/pmemd/src/cuda/kCalculateEFieldEnergy.cu @@ -0,0 +1,80 @@ +#include "copyright.i" + +/***************************************************/ +/* */ +/* AMBER NVIDIA CUDA GPU IMPLEMENTATION */ +/* PMEMD VERSION */ +/* Feb 2014 */ +/* by */ +/* Scott Le Grand */ +/* and */ +/* Ross C. Walker */ +/* */ +/***************************************************/ + +#include +#include "gpu.h" +#include "ptxmacros.h" +//#include "cuda_profiler_api.h" + +//#define PME_ENERGY + +static __constant__ cudaSimulation cSim; + +void SetkCalculateEFieldEnergySim(gpuContext gpu) +{ + cudaError_t status; + status = cudaMemcpyToSymbol(cSim, &gpu->sim, sizeof(cudaSimulation)); + RTERROR(status, "cudaMemcpyToSymbol: SetSim copy to cSim failed"); +} + +void GetkCalculateEFieldEnergySim(gpuContext gpu) +{ + cudaError_t status; + status = cudaMemcpyFromSymbol(&gpu->sim, cSim, sizeof(cudaSimulation)); + RTERROR(status, "cudaMemcpyToSymbol: SetSim copy to cSim failed"); +} + + +// EField kernels + +#define PME_ENERGY + +__global__ void +#if (__CUDA_ARCH__ >= 300) +__launch_bounds__(SM_3X_UPDATE_THREADS_PER_BLOCK, 1) +#else +__launch_bounds__(SM_2X_UPDATE_THREADS_PER_BLOCK, 1) +#endif +kCalculateEFieldEnergy_kernel(PMEDouble nstep, PMEDouble dt) +#include "kEFE.h" + +#undef PME_ENERGY + +__global__ void +#if (__CUDA_ARCH__ >= 300) +__launch_bounds__(SM_3X_UPDATE_THREADS_PER_BLOCK, 1) +#else +__launch_bounds__(SM_2X_UPDATE_THREADS_PER_BLOCK, 1) +#endif +kCalculateEFieldForces_kernel(PMEDouble nstep, PMEDouble dt) +#include "kEFE.h" + +extern "C" void kCalculateEFieldForces(gpuContext gpu, int nstep, PMEDouble dt) +{ + kCalculateEFieldForces_kernel<<updateBlocks, gpu->updateThreadsPerBlock>>>((PMEDouble)nstep, dt); + LAUNCHERROR("kCalculateEFieldForces"); +} + + +extern "C" void kCalculateEFieldEnergy(gpuContext gpu, int nstep, PMEDouble dt) +{ + kCalculateEFieldEnergy_kernel<<updateBlocks, gpu->updateThreadsPerBlock>>>((PMEDouble)nstep, dt); + LAUNCHERROR("kCalculatePMENonbondEnergy"); +} + +extern "C" void kCalculateEFieldEnergyInitKernels(gpuContext gpu) +{ + cudaFuncSetSharedMemConfig(kCalculateEFieldEnergy_kernel, cudaSharedMemBankSizeEightByte); + cudaFuncSetSharedMemConfig(kCalculateEFieldForces_kernel, cudaSharedMemBankSizeEightByte); +} diff --git src/pmemd/src/cuda/kEFE.h src/pmemd/src/cuda/kEFE.h new file mode 100644 index 0000000..2c4a6ca --- /dev/null +++ src/pmemd/src/cuda/kEFE.h @@ -0,0 +1,84 @@ +#include "copyright.i" + +/***************************************************/ +/* */ +/* AMBER NVIDIA CUDA GPU IMPLEMENTATION */ +/* PMEMD VERSION */ +/* Feb 2014 */ +/* by */ +/* Scott Le Grand */ +/* and */ +/* Ross C. Walker */ +/* */ +/***************************************************/ + +{ +// #defines: PME_ENERGY, NEIGHBOR_LIST + + // Precompute Electric Field Constants + PMEFloat phase = cos((2*PI*cSim.effreq/1000)*((PMEFloat)dt*(PMEFloat)nstep)-PI/180*cSim.efphase); + PMEFloat loc_efx = phase * (PMEFloat)cSim.efx; + PMEFloat loc_efy = phase * (PMEFloat)cSim.efy; + PMEFloat loc_efz = phase * (PMEFloat)cSim.efz; +#ifdef PME_ENERGY + PMEForce sEEField = (PMEForce)0; +#endif + + unsigned int pos = blockIdx.x * blockDim.x + threadIdx.x; + unsigned int increment = gridDim.x * blockDim.x; + unsigned int imgPos = cSim.pImageAtomLookup[pos]; + + if (cSim.efn == 1) + { + loc_efx *= (PMEFloat)cSim.recip[0][0]; + loc_efy *= (PMEFloat)cSim.recip[1][1]; + loc_efz *= (PMEFloat)cSim.recip[2][2]; + } + + if(pos < cSim.atoms) + { + //Convert internal charge to electron charge + PMEDouble electron_charge = (PMEDouble)cSim.pImageCharge[imgPos] / (PMEDouble)18.2223; + + PMEDouble ef_frcx = electron_charge * loc_efx; + PMEDouble ef_frcy = electron_charge * loc_efy; + PMEDouble ef_frcz = electron_charge * loc_efz; + +#ifdef use_SPFP + atomicAdd((unsigned long long int*)&cSim.pNBForceXAccumulator[imgPos], llitoulli(ef_frcx * FORCESCALEF)); + atomicAdd((unsigned long long int*)&cSim.pNBForceYAccumulator[imgPos], llitoulli(ef_frcy * FORCESCALEF)); + atomicAdd((unsigned long long int*)&cSim.pNBForceZAccumulator[imgPos], llitoulli(ef_frcz * FORCESCALEF)); +#elif defined(use_SPXP) + atomicAdd((unsigned long long int*)&cSim.pNBForceXAccumulator[imgPos], llitoulli(fast_llrintf(ef_frcx * FORCESCALEF))); + atomicAdd((unsigned long long int*)&cSim.pNBForceYAccumulator[imgPos], llitoulli(fast_llrintf(ef_frcy * FORCESCALEF))); + atomicAdd((unsigned long long int*)&cSim.pNBForceZAccumulator[imgPos], llitoulli(fast_llrintf(ef_frcz * FORCESCALEF))); +#else + atomicAdd((unsigned long long int*)&cSim.pNBForceXAccumulator[imgPos], llitoulli(llrint((PMEForce)ef_frcx * FORCESCALE))); + atomicAdd((unsigned long long int*)&cSim.pNBForceYAccumulator[imgPos], llitoulli(llrint((PMEForce)ef_frcy * FORCESCALE))); + atomicAdd((unsigned long long int*)&cSim.pNBForceZAccumulator[imgPos], llitoulli(llrint((PMEForce)ef_frcz * FORCESCALE))); +#endif + +#ifdef PME_ENERGY + PMEDouble AtomX = (PMEDouble)cSim.pImageX[imgPos]; + PMEDouble AtomY = (PMEDouble)cSim.pImageY[imgPos]; + PMEDouble AtomZ = (PMEDouble)cSim.pImageZ[imgPos]; + PMEDouble ef_vx = AtomX - (PMEDouble)cSim.ucell[0][0]; + PMEDouble ef_vy = AtomY - (PMEDouble)cSim.ucell[1][1]; + PMEDouble ef_vz = AtomZ - (PMEDouble)cSim.ucell[2][2]; +#ifndef use_DPFP + sEEField -= fast_llrintf(ENERGYSCALEF*(PMEFloat)(ef_vx * ef_frcx + ef_vy * ef_frcy + ef_vz * ef_frcz)); +#else + sEEField -= (ef_vx * (PMEDouble)ef_frcx + ef_vy * (PMEDouble)ef_frcy + ef_vz * (PMEDouble)ef_frcz); +#endif +#endif + pos += increment; + +#ifdef PME_ENERGY +#ifndef use_DPFP + atomicAdd(cSim.pEEField, llitoulli(sEEField)); +#else + atomicAdd(cSim.pEEField, llitoulli(llrint(sEEField * ENERGYSCALE))); +#endif +#endif + } +} diff --git src/pmemd/src/cuda/kForcesUpdate.cu src/pmemd/src/cuda/kForcesUpdate.cu index ab0cf7b..b160c23 100644 --- src/pmemd/src/cuda/kForcesUpdate.cu +++ src/pmemd/src/cuda/kForcesUpdate.cu @@ -621,7 +621,7 @@ __launch_bounds__(SM_2X_GENERAL_THREADS_PER_BLOCK, 1) #endif kRefreshCharges_kernel() { - unsigned int pos = blockIdx.x * blockDim.x + threadIdx.x; + unsigned int pos = blockIdx.x * blockDim.x + threadIdx.x; unsigned int increment = gridDim.x * blockDim.x; while (pos < cSim.atoms) { diff --git src/pmemd/src/cuda/kU.h src/pmemd/src/cuda/kU.h index 50adae4..7065f41 100644 --- src/pmemd/src/cuda/kU.h +++ src/pmemd/src/cuda/kU.h @@ -122,10 +122,10 @@ double newAtomY = atomY + velY * dtx; double newAtomZ = atomZ + velZ * dtx; - ATOMX(pos) = newAtomX; ATOMY(pos) = newAtomY; ATOMZ(pos) = newAtomZ; + #ifndef UPDATE_NEIGHBORLIST PMEFloat2 xy; xy.x = newAtomX; diff --git src/pmemd/src/get_efield_energy.F90 src/pmemd/src/get_efield_energy.F90 index f107dac..01c0433 100644 --- src/pmemd/src/get_efield_energy.F90 +++ src/pmemd/src/get_efield_energy.F90 @@ -78,9 +78,9 @@ subroutine get_efield_energy(img_frc, crd, img_qterm, img_atm_map, & !Normalize efield only works in a box. Note: add trap if(efn .eq. 1) then - loc_efx=loc_efx/ucell(1,1) - loc_efy=loc_efy/ucell(2,2) - loc_efz=loc_efz/ucell(3,3) + loc_efx=loc_efx*recip(1,1) + loc_efy=loc_efy*recip(2,2) + loc_efz=loc_efz*recip(3,3) end if if (need_pot_enes) then @@ -93,7 +93,7 @@ subroutine get_efield_energy(img_frc, crd, img_qterm, img_atm_map, & crd_i = img_atm_map(img_i) !convert image array to crd array - charge = img_qterm(img_i) / AMBER_ELECTROSTATIC + charge = img_qterm(img_i) * ONE_AMBER_ELECTROSTATIC efrcx = charge*loc_efx efrcy = charge*loc_efy @@ -135,14 +135,6 @@ subroutine get_efield_energy(img_frc, crd, img_qterm, img_atm_map, & end if - ! Save the energies: - - if(efn .eq. 0) then - loc_efx = loc_efx * ucell(1,1) - loc_efy = loc_efy * ucell(2,2) - loc_efz = loc_efz * ucell(3,3) - end if - return end subroutine get_efield_energy diff --git src/pmemd/src/pme_alltasks_setup.F90 src/pmemd/src/pme_alltasks_setup.F90 index 8dea97b..53d32ed 100644 --- src/pmemd/src/pme_alltasks_setup.F90 +++ src/pmemd/src/pme_alltasks_setup.F90 @@ -283,7 +283,8 @@ subroutine pme_alltasks_setup(num_ints, num_reals) end if #ifdef CUDA - call gpu_pme_alltasks_setup(nfft1, nfft2, nfft3, gbl_prefac1, gbl_prefac2, gbl_prefac3, ew_coeff, ips, fswitch) + call gpu_pme_alltasks_setup(nfft1, nfft2, nfft3, gbl_prefac1, gbl_prefac2, gbl_prefac3, ew_coeff, ips, fswitch, efx,& + efy, efz, efn, efphase, effreq) #endif return diff --git src/pmemd/src/pme_force.F90 src/pmemd/src/pme_force.F90 index dec796e..6bf81b6 100644 --- src/pmemd/src/pme_force.F90 +++ src/pmemd/src/pme_force.F90 @@ -336,7 +336,8 @@ subroutine pme_force(atm_cnt, crd, frc, img_atm_map, atm_img_map, & end if call gpu_calculate_gamd_dihedral_weight(totdih) end if - call gpu_pme_ene(ew_coeff, uc_volume, pot_ene, enmr, virial, ekcmt) + call gpu_pme_ene(ew_coeff, uc_volume, pot_ene, enmr, virial, & + ekcmt, nstep, dt) call update_time(nonbond_time) if (need_virials) then vir%molecular(1,1) = virial(1) @@ -351,7 +352,7 @@ subroutine pme_force(atm_cnt, crd, frc, img_atm_map, atm_img_map, & virial(3) = vir%molecular(3,3) end if else - call gpu_pme_force(ew_coeff, uc_volume, virial, ekcmt) + call gpu_pme_force(ew_coeff, uc_volume, virial, ekcmt, nstep, dt) call update_time(nonbond_time) if (need_virials) then @@ -1296,9 +1297,10 @@ subroutine pme_force(atm_cnt, crd, frc, img_atm_map, atm_img_map, & if((igamd.eq.2).or.(igamd.eq.3))then call gpu_calculate_gamd_dihedral_energy_weight() endif - call gpu_pme_ene(ew_coeff, uc_volume, pot_ene, enmr, virial, ekcmt) + call gpu_pme_ene(ew_coeff, uc_volume, pot_ene, enmr, virial, & + ekcmt, nstep, dt) else - call gpu_pme_force(ew_coeff, uc_volume, virial, ekcmt) + call gpu_pme_force(ew_coeff, uc_volume, virial, ekcmt, nstep, dt) end if else call ipsupdate(ntb) diff --git test/cuda/4096wat/Run.pure_wat_efield test/cuda/4096wat/Run.pure_wat_efield new file mode 100755 index 0000000..2534f81 --- /dev/null +++ test/cuda/4096wat/Run.pure_wat_efield @@ -0,0 +1,69 @@ +#!/bin/csh -f +#TEST-PROGRAM pmemd.cuda +#TEST-DESCRIP TO_BE_DEtermined +#TEST-PURPOSE regression, basic +#TEST-STATE undocumented + +#$1 = PREC_MODEL +#$2 = NETCDF + +if( ! $?DO_PARALLEL ) then + setenv DO_PARALLEL " " + if( $?TESTsander ) then + set sander = $TESTsander + else + set sander = ../../../bin/pmemd.cuda_$1 + endif +else + if( $?TESTsander ) then + set sander = $TESTsander + else + set sander = ../../../bin/pmemd.cuda_$1.MPI + endif +endif + + +cat > mdin <