*******> update.1
 Author: Charles Lin
 Date: May 5, 2016
 Programs: pmemd, pmemd.cuda, pmemd.cuda.MPI
 Description: Adds support for GPU Eternal Electric Fields:
              1) Adds support for external electric fields on GPU (serial + MPI)
              2) Cleans up some extraneous code in serial electric fields
              3) Adds test cases for GPU electric fields
 --------------------------------------------------------------------------------
 src/pmemd/src/cuda/Makefile                      |   5 +-
 src/pmemd/src/cuda/Makefile.advanced             |   3 +-
 src/pmemd/src/cuda/gpu.cpp                       |  37 +-
 src/pmemd/src/cuda/gpu.h                         |   4 +
 src/pmemd/src/cuda/gputypes.cpp                  |   7 +
 src/pmemd/src/cuda/gputypes.h                    |  15 +-
 src/pmemd/src/cuda/kCalculateEFieldEnergy.cu     |  80 +++++
 src/pmemd/src/cuda/kEFE.h                        |  84 +++++
 src/pmemd/src/cuda/kForcesUpdate.cu              |   2 +-
 src/pmemd/src/cuda/kU.h                          |   2 +-
 src/pmemd/src/get_efield_energy.F90              |  16 +-
 src/pmemd/src/pme_alltasks_setup.F90             |   3 +-
 src/pmemd/src/pme_force.F90                      |  10 +-
 test/cuda/4096wat/Run.pure_wat_efield            |  69 ++++
 test/cuda/4096wat/mdout.pure_wat_efield.GPU_DPFP | 407 ++++++++++++++++++++++
 test/cuda/4096wat/mdout.pure_wat_efield.GPU_SPFP | 413 ++++++++++++++++++++++
 test/cuda/4096wat/mdout.pure_wat_efield.GPU_SPXP | 420 +++++++++++++++++++++++
 test/cuda/Makefile                               |   1 +
 18 files changed, 1547 insertions(+), 31 deletions(-)

diff --git src/pmemd/src/cuda/Makefile src/pmemd/src/cuda/Makefile
index 74a8100..5e89499 100644
--- src/pmemd/src/cuda/Makefile
+++ src/pmemd/src/cuda/Makefile
@@ -5,7 +5,8 @@ include ../../../config.h
 CU_OBJS = cuda_info.o gpu.o gputypes.o \
           kForcesUpdate.o kCalculateLocalForces.o kCalculateGBBornRadii.o \
           kCalculatePMENonbondEnergy.o kCalculateGBNonbondEnergy1.o kNLRadixSort.o \
-          kCalculateGBNonbondEnergy2.o kShake.o kNeighborList.o kPMEInterpolation.o
+          kCalculateGBNonbondEnergy2.o kShake.o kNeighborList.o kPMEInterpolation.o \
+          kCalculateEFieldEnergy.o
 
 .SUFFIXES: .F90 .cpp .cu .o
 
@@ -22,6 +23,7 @@ kNeighborList.cu.CU_FLAGS                   =
 kCalculatePMENonbondEnergy.cu.CU_FLAGS      =
 kCalculateAMDWeights.cu.CU_FLAGS            =
 kCalculateGAMDWeights.cu.CU_FLAGS           =
+kCalculateEFieldEnergy.cu.CU_FLAGS          =
 
 cuda.a: $(CU_OBJS) 
 	ar rvs cuda.a $(CU_OBJS)
@@ -50,6 +52,7 @@ kCalculateGBNonbondEnergy2.o:	kCalculateGBNonbondEnergy2.h
 kCalculateGBBornRadii.o:	kCalculateGBBornRadii.h
 kCalculateLocalForces.o:	kCCF.h kCLF.h kCNF.h kCLFdih.h kCLFdihGaMD.h
 kPMEInterpolation.o:	kPGGW.h kPGS.h kPSSE.h
+kCalculateEFieldEnergy.o: kEFE.h
 kShake.o:	kShake.h
 $(CU_OBJS):	gpu.h gputypes.h
 
diff --git src/pmemd/src/cuda/Makefile.advanced src/pmemd/src/cuda/Makefile.advanced
index aded597..7706edd 100644
--- src/pmemd/src/cuda/Makefile.advanced
+++ src/pmemd/src/cuda/Makefile.advanced
@@ -6,7 +6,7 @@ CU_OBJS = cuda_info.o gpu.o gputypes.o \
           kForcesUpdate.o kCalculateLocalForces.o kCalculateGBBornRadii.o \
           kCalculatePMENonbondEnergy.o radixsort.o radixsort_c.o \
           kCalculateGBNonbondEnergy1.o kCalculateGBNonbondEnergy2.o \
-          kShake.o kNeighborList.o kPMEInterpolation.o \
+          kShake.o kNeighborList.o kPMEInterpolation.o kCalculateEFieldEnergy.o \
           cudpp_scan.o cudpp_scan_c.o
 
 .SUFFIXES: .fpp .cpp .cu .o
@@ -21,6 +21,7 @@ kShake.cu.CU_FLAGS                          =
 kPMEInterpolation.cu.CU_FLAGS               =
 kNeighborList.cu.CU_FLAGS                   =
 kCalculatePMENonbondEnergy.cu.CU_FLAGS      =
+kCalculateEFieldEnergy.cu.CU_FLAGS          =
 cudpp_scan.cu.CU_FLAGS                      =
 
 cuda.a: $(CU_OBJS) 
diff --git src/pmemd/src/cuda/gpu.cpp src/pmemd/src/cuda/gpu.cpp
index 34828b5..0fdd5a4 100644
--- src/pmemd/src/cuda/gpu.cpp
+++ src/pmemd/src/cuda/gpu.cpp
@@ -3329,7 +3329,8 @@ PRINTMETHOD("gpu_create_outputbuffers");
     gpu->sim.pENMRr6avDistance                      = gpu->sim.pEnergyBuffer + 14;
     gpu->sim.pENMRAngle                             = gpu->sim.pEnergyBuffer + 15;
     gpu->sim.pENMRTorsion                           = gpu->sim.pEnergyBuffer + 16;
-    gpu->sim.pESurf                                 = gpu->sim.pEnergyBuffer + 17;
+//  gpu->sim.pESurf                                 = gpu->sim.pEnergyBuffer + 17; //Surf seems to be surface area term for GBSA.  Does not seem to be coded in for CUDA.
+    gpu->sim.pEEField                               = gpu->sim.pEnergyBuffer + 17;
     gpu->sim.pVirial                                = gpu->sim.pEnergyBuffer + VIRIALOFFSET;
     gpu->sim.pVirial_11                             = gpu->sim.pEnergyBuffer + VIRIALOFFSET;
     gpu->sim.pVirial_22                             = gpu->sim.pEnergyBuffer + VIRIALOFFSET + 1;
@@ -3427,6 +3428,7 @@ PRINTMETHOD("gpuCopyConstants");
 	}
         SetkPMEInterpolationSim(gpu);
         SetkNeighborListSim(gpu);
+        SetkCalculateEFieldEnergySim(gpu);
         SetkCalculatePMENonbondEnergySim(gpu);
     }
 }
@@ -5121,7 +5123,7 @@ PRINTMETHOD("gpu_pme_ntp_setup");
 #endif
 }
 
-extern "C" void gpu_pme_alltasks_setup_(int* nfft1, int* nfft2, int* nfft3, double* prefac1, double* prefac2, double* prefac3, double* ew_coeff, int* ips, double* fswitch)
+extern "C" void gpu_pme_alltasks_setup_(int* nfft1, int* nfft2, int* nfft3, double* prefac1, double* prefac2, double* prefac3, double* ew_coeff, int* ips, double* fswitch, double* efx, double* efy, double* efz, int* efn, double* efphase, double* effreq)
 {
 PRINTMETHOD("gpu_pme_alltasks_setup");
 
@@ -5139,6 +5141,12 @@ PRINTMETHOD("gpu_pme_alltasks_setup");
     int n1                                      = ((*nfft1 + 1) + PADDING) & PADDINGMASK;
     int n2                                      = ((*nfft2 + 1) + PADDING) & PADDINGMASK;
     int n3                                      = ((*nfft3 + 1) + PADDING) & PADDINGMASK;
+    gpu->sim.efx                                = *efx;
+    gpu->sim.efy                                = *efy;
+    gpu->sim.efz                                = *efz;
+    gpu->sim.efn                                = *efn;
+    gpu->sim.efphase                            = *efphase;
+    gpu->sim.effreq                             = *effreq;
     gpu->sim.fswitch                            = *fswitch;
     gpu->sim.fswitch2                           = gpu->sim.fswitch * gpu->sim.fswitch;
     gpu->sim.fswitch3                           = gpu->sim.fswitch * gpu->sim.fswitch2;
@@ -5611,7 +5619,7 @@ PRINTMETHOD("gpu_allreduce");
 }
 #endif
 
-extern "C" void gpu_pme_ene_(double* ewaldcof, double* vol, pme_pot_ene_rec* pEnergy, double enmr[3], double virial[3], double ekcmt[3])
+extern "C" void gpu_pme_ene_(double* ewaldcof, double* vol, pme_pot_ene_rec* pEnergy, double enmr[3], double virial[3], double ekcmt[3], int* nstep, double* dt)
 {
 PRINTMETHOD("gpu_pme_ene");
     // Rebuild neighbor list
@@ -5673,6 +5681,12 @@ PRINTMETHOD("gpu_pme_ene");
             kCalculatePMENonbondEnergy(gpu);  
         }  
 
+        // Electric Field Energy
+        if (gpu->sim.efx != 0 || gpu->sim.efy != 0 || gpu->sim.efz != 0)
+        {
+        SetkCalculateEFieldEnergySim(gpu);
+            kCalculateEFieldEnergy(gpu, *nstep, *dt);
+        }
 
         if (gpu->sim.EPs > 0)
             kOrientForces(gpu);  
@@ -5777,8 +5791,8 @@ PRINTMETHOD("gpu_pme_ene");
             energy[i]               = (PMEDouble)val / ENERGYSCALE;
         }
         pEnergy->total         += energy[i];
-        //printf("pre-virial %6d %16.7f\n", i, energy[i]);
-        //printf("%06d %6d %16.7f\n", gpu->gpuID, i, energy[i]);
+//        printf("pre-virial %6d %16.7f\n", i, energy[i]);
+//        printf("%06d %6d %16.7f\n", gpu->gpuID, i, energy[i]);
     }
     for (int i = VIRIALOFFSET; i < ENERGYTERMS; i++)
     {
@@ -5835,6 +5849,7 @@ PRINTMETHOD("gpu_pme_ene");
     enmr[0]                         = energy[14];
     enmr[1]                         = energy[15];
     enmr[2]                         = energy[16];
+    pEnergy->efield                 = energy[17];
     // Grab virial if needed
     if ((gpu->sim.ntp > 0) && (gpu->sim.barostat == 1))
     {
@@ -5867,7 +5882,7 @@ PRINTMETHOD("gpu_pme_ene");
 #endif
 }
 
-extern "C" void gpu_pme_force_(double* ewaldcof, double* vol, double virial[3], double ekcmt[3])
+extern "C" void gpu_pme_force_(double* ewaldcof, double* vol, double virial[3], double ekcmt[3], int nstep, double dt)
 {
 PRINTMETHOD("gpu_pme_force");
     // Rebuild neighbor list
@@ -5930,6 +5945,13 @@ PRINTMETHOD("gpu_pme_force");
             kCalculatePMENonbondForces(gpu);                                         
         }
 
+        // Electric Field Forces
+        if (gpu->sim.efx != 0 || gpu->sim.efy != 0 || gpu->sim.efz != 0)
+        {
+        SetkCalculateEFieldEnergySim(gpu);
+            kCalculateEFieldForces(gpu, nstep, dt);
+        }
+
         if (gpu->sim.EPs > 0)
             kOrientForces(gpu);     
     }
@@ -6072,7 +6094,7 @@ PRINTMETHOD("gpu_ips_ene");
         pEnergy->elec_tot           = energy[10];
     else
 #endif
-        pEnergy->elec_tot           = energy[10] + gpu->sim.EIPSEL + gpu->sim.eipssel;
+    pEnergy->elec_tot               = energy[10] + gpu->sim.EIPSEL + gpu->sim.eipssel;
     pEnergy->elec_dir               = pEnergy->elec_tot;
     pEnergy->elec_recip             = 0.0;
     pEnergy->elec_nb_adjust         = 0.0;
@@ -6089,6 +6111,7 @@ PRINTMETHOD("gpu_ips_ene");
     enmr[0]                         = energy[14];
     enmr[1]                         = energy[15];
     enmr[2]                         = energy[16];
+    pEnergy->efield                 = energy[17];
 
     // Grab virial if needed
     if ((gpu->sim.ntp > 0) && (gpu->sim.barostat == 1))
diff --git src/pmemd/src/cuda/gpu.h src/pmemd/src/cuda/gpu.h
index 1a794b9..ce7d06a 100644
--- src/pmemd/src/cuda/gpu.h
+++ src/pmemd/src/cuda/gpu.h
@@ -96,6 +96,8 @@ extern "C" void kCalculateLocalForcesInitKernels(gpuContext gpu);
 extern "C" void kShakeInitKernels(gpuContext gpu);
 extern "C" void SetkForcesUpdateSim(gpuContext gpu);
 extern "C" void GetkForcesUpdateSim(gpuContext gpu);
+extern "C" void SetkCalculateEFieldEnergySim(gpuContext gpu);
+extern "C" void GetkCalculateEFieldEnergySim(gpuContext gpu);
 extern "C" void SetkCalculateLocalForcesSim(gpuContext gpu);
 extern "C" void GetkCalculateLocalForcesSim(gpuContext gpu);
 extern "C" void SetkCalculateGBBornRadiiSim(gpuContext gpu);
@@ -147,6 +149,8 @@ extern "C" void kRelaxMDUpdate(gpuContext gpu, PMEDouble dt, PMEDouble temp0, PM
 extern "C" void kShake(gpuContext gpu);
 extern "C" void kFastShake(gpuContext gpu);
 extern "C" void kCalculateKineticEnergy(gpuContext gpu, PMEFloat c_ave);
+extern "C" void kCalculateEFieldForces(gpuContext gpu, int nstep, double dt);
+extern "C" void kCalculateEFieldEnergy(gpuContext gpu, int nstep, double dt);
 extern "C" void kCalculateCOM(gpuContext gpu);
 extern "C" void kCalculateSoluteCOM(gpuContext gpu);
 extern "C" void kReduceSoluteCOM(gpuContext gpu);
diff --git src/pmemd/src/cuda/gputypes.cpp src/pmemd/src/cuda/gputypes.cpp
index 0467f3a..291d78b 100644
--- src/pmemd/src/cuda/gputypes.cpp
+++ src/pmemd/src/cuda/gputypes.cpp
@@ -30,6 +30,13 @@ void clearCudaSimulation(cudaSimulation& sim)
     sim.scee                    = (1.0 / 1.2);
     sim.cut                     = 8.0;
     sim.cut2                    = sim.cut * sim.cut;
+    sim.fswitch                 = -1;
+    sim.efx                     = 0;
+    sim.efy                     = 0;
+    sim.efz                     = 0;
+    sim.efn                     = 0;
+    sim.efphase                 = 0;
+    sim.effreq                  = 0;
     sim.skinnb                  = 2.0f;
     sim.dielc                   = 1.0;
     sim.tol                     = 0.0001;
diff --git src/pmemd/src/cuda/gputypes.h src/pmemd/src/cuda/gputypes.h
index 9ba868b..d7a76e2 100644
--- src/pmemd/src/cuda/gputypes.h
+++ src/pmemd/src/cuda/gputypes.h
@@ -167,10 +167,10 @@ enum {
 
     NLEXCLUSIONSHIFT            = 8,
     NLEXCLUSIONATOMMASK         = ((1 << NLEXCLUSIONSHIFT) - 1),
-    VIRIALOFFSET                = 18,
+    VIRIALOFFSET                = 19,
     AMDEDIHEDRALOFFSET          = 25,
     GAMDEDIHEDRALOFFSET         = 26,
-    ENERGYTERMS                 = 27,
+    ENERGYTERMS                 = 28,
     TI_ENERGYTERMS              = ENERGYTERMS * 3,
     PADDING                     = 16,
     PADDINGMASK                 = 0xfffffff0,
@@ -442,6 +442,8 @@ struct pme_pot_ene_rec
     double cmap;
     double amd_boost;
     double gamd_boost;
+    double emap;
+    double efield;
 };
 
 struct NTPData 
@@ -697,6 +699,12 @@ struct cudaSimulation {
     PMEFloat                    cut3invcut3minfswitch3;             // VDW force switch constant cut3 / (cut3-fswitch3)
     PMEFloat                    cutPlusSkin;                        // Nonbond interaction cutooff plus skin
     PMEFloat                    cutPlusSkin2;                       // Nonbond interaction cutooff plus skin squared
+    int                         efn;                                // Normalize electric field vectors
+    PMEFloat                    efx;                                // Electric field x vector
+    PMEFloat                    efy;                                // Electric field y vector
+    PMEFloat                    efz;                                // Electric field z vector
+    PMEFloat                    efphase;                            // Electric field spatial phase
+    PMEFloat                    effreq;                             // Electric field time frequency
     double                      dielc;                              // Dielectric constant
     double                      gamma_ln;                           // Langevin integration parameter
     double                      c_ave;                              // Langevin integration parameter
@@ -1151,7 +1159,8 @@ struct cudaSimulation {
     unsigned long long int*     pENMRr6avDistance;                  // Pointer to NMR r6av distance energy
     unsigned long long int*     pENMRAngle;                         // Pointer to NMR angle energy
     unsigned long long int*     pENMRTorsion;                       // Pointer to NMR torsion energy
-    unsigned long long int*     pESurf;                             // Pointer to GBSA surface energy
+//  unsigned long long int*     pESurf;                             // Pointer to GBSA surface energy
+    unsigned long long int*     pEEField;                           // Pointer to Electric Field energy
     unsigned long long int*     pVirial;                            // Pointer to PME virial
     unsigned long long int*     pVirial_11;                         // Pointer to PME virial component
     unsigned long long int*     pVirial_22;                         // Pointer to PME virial component
diff --git src/pmemd/src/cuda/kCalculateEFieldEnergy.cu src/pmemd/src/cuda/kCalculateEFieldEnergy.cu
new file mode 100644
index 0000000..cd5224d
--- /dev/null
+++ src/pmemd/src/cuda/kCalculateEFieldEnergy.cu
@@ -0,0 +1,80 @@
+#include "copyright.i"
+
+/***************************************************/
+/*                                                 */
+/*      AMBER NVIDIA CUDA GPU IMPLEMENTATION       */
+/*                 PMEMD VERSION                   */
+/*                   Feb 2014                      */
+/*                      by                         */
+/*                Scott Le Grand                   */
+/*                     and                         */
+/*                Ross C. Walker                   */
+/*                                                 */
+/***************************************************/
+
+#include <cuda.h>
+#include "gpu.h"
+#include "ptxmacros.h"
+//#include "cuda_profiler_api.h"
+
+//#define PME_ENERGY
+
+static __constant__ cudaSimulation cSim;
+
+void SetkCalculateEFieldEnergySim(gpuContext gpu)
+{
+    cudaError_t status;
+    status = cudaMemcpyToSymbol(cSim, &gpu->sim, sizeof(cudaSimulation));     
+    RTERROR(status, "cudaMemcpyToSymbol: SetSim copy to cSim failed");
+}
+
+void GetkCalculateEFieldEnergySim(gpuContext gpu)
+{
+    cudaError_t status;
+    status = cudaMemcpyFromSymbol(&gpu->sim, cSim, sizeof(cudaSimulation));     
+    RTERROR(status, "cudaMemcpyToSymbol: SetSim copy to cSim failed");
+}
+
+
+// EField kernels
+
+#define PME_ENERGY
+
+__global__ void
+#if (__CUDA_ARCH__ >= 300)
+__launch_bounds__(SM_3X_UPDATE_THREADS_PER_BLOCK, 1)
+#else
+__launch_bounds__(SM_2X_UPDATE_THREADS_PER_BLOCK, 1)
+#endif
+kCalculateEFieldEnergy_kernel(PMEDouble nstep, PMEDouble dt)
+#include "kEFE.h"
+
+#undef PME_ENERGY
+
+__global__ void
+#if (__CUDA_ARCH__ >= 300)
+__launch_bounds__(SM_3X_UPDATE_THREADS_PER_BLOCK, 1)
+#else
+__launch_bounds__(SM_2X_UPDATE_THREADS_PER_BLOCK, 1)
+#endif
+kCalculateEFieldForces_kernel(PMEDouble nstep, PMEDouble dt)
+#include "kEFE.h"
+
+extern "C" void kCalculateEFieldForces(gpuContext gpu, int nstep, PMEDouble dt)
+{ 
+   kCalculateEFieldForces_kernel<<<gpu->updateBlocks, gpu->updateThreadsPerBlock>>>((PMEDouble)nstep, dt);
+   LAUNCHERROR("kCalculateEFieldForces"); 
+}
+
+
+extern "C" void kCalculateEFieldEnergy(gpuContext gpu, int nstep, PMEDouble dt)
+{
+    kCalculateEFieldEnergy_kernel<<<gpu->updateBlocks, gpu->updateThreadsPerBlock>>>((PMEDouble)nstep, dt);
+    LAUNCHERROR("kCalculatePMENonbondEnergy");
+}
+
+extern "C" void kCalculateEFieldEnergyInitKernels(gpuContext gpu)
+{
+    cudaFuncSetSharedMemConfig(kCalculateEFieldEnergy_kernel, cudaSharedMemBankSizeEightByte);
+    cudaFuncSetSharedMemConfig(kCalculateEFieldForces_kernel, cudaSharedMemBankSizeEightByte);
+}
diff --git src/pmemd/src/cuda/kEFE.h src/pmemd/src/cuda/kEFE.h
new file mode 100644
index 0000000..2c4a6ca
--- /dev/null
+++ src/pmemd/src/cuda/kEFE.h
@@ -0,0 +1,84 @@
+#include "copyright.i"
+
+/***************************************************/
+/*                                                 */
+/*      AMBER NVIDIA CUDA GPU IMPLEMENTATION       */
+/*                 PMEMD VERSION                   */
+/*                   Feb 2014                      */
+/*                      by                         */
+/*                Scott Le Grand                   */
+/*                     and                         */
+/*                Ross C. Walker                   */
+/*                                                 */
+/***************************************************/
+
+{
+// #defines: PME_ENERGY, NEIGHBOR_LIST
+
+    // Precompute Electric Field Constants
+    PMEFloat phase                                      = cos((2*PI*cSim.effreq/1000)*((PMEFloat)dt*(PMEFloat)nstep)-PI/180*cSim.efphase);
+    PMEFloat loc_efx                                    = phase * (PMEFloat)cSim.efx;
+    PMEFloat loc_efy                                    = phase * (PMEFloat)cSim.efy;
+    PMEFloat loc_efz                                    = phase * (PMEFloat)cSim.efz;
+#ifdef PME_ENERGY
+    PMEForce sEEField                                   = (PMEForce)0;
+#endif
+    
+    unsigned int pos                                    = blockIdx.x * blockDim.x + threadIdx.x;
+    unsigned int increment                              = gridDim.x * blockDim.x;
+    unsigned int imgPos                                 = cSim.pImageAtomLookup[pos];
+
+    if (cSim.efn == 1)
+    {
+        loc_efx                                        *= (PMEFloat)cSim.recip[0][0];
+        loc_efy                                        *= (PMEFloat)cSim.recip[1][1];
+        loc_efz                                        *= (PMEFloat)cSim.recip[2][2];
+    }
+
+    if(pos < cSim.atoms)
+    {        
+        //Convert internal charge to electron charge
+        PMEDouble electron_charge                       = (PMEDouble)cSim.pImageCharge[imgPos] / (PMEDouble)18.2223;
+        
+        PMEDouble ef_frcx                               = electron_charge * loc_efx;
+        PMEDouble ef_frcy                               = electron_charge * loc_efy;
+        PMEDouble ef_frcz                               = electron_charge * loc_efz;
+    
+#ifdef use_SPFP
+        atomicAdd((unsigned long long int*)&cSim.pNBForceXAccumulator[imgPos], llitoulli(ef_frcx * FORCESCALEF));
+        atomicAdd((unsigned long long int*)&cSim.pNBForceYAccumulator[imgPos], llitoulli(ef_frcy * FORCESCALEF));
+        atomicAdd((unsigned long long int*)&cSim.pNBForceZAccumulator[imgPos], llitoulli(ef_frcz * FORCESCALEF));
+#elif defined(use_SPXP)
+        atomicAdd((unsigned long long int*)&cSim.pNBForceXAccumulator[imgPos], llitoulli(fast_llrintf(ef_frcx * FORCESCALEF)));
+        atomicAdd((unsigned long long int*)&cSim.pNBForceYAccumulator[imgPos], llitoulli(fast_llrintf(ef_frcy * FORCESCALEF)));
+        atomicAdd((unsigned long long int*)&cSim.pNBForceZAccumulator[imgPos], llitoulli(fast_llrintf(ef_frcz * FORCESCALEF)));
+#else
+        atomicAdd((unsigned long long int*)&cSim.pNBForceXAccumulator[imgPos], llitoulli(llrint((PMEForce)ef_frcx * FORCESCALE)));
+        atomicAdd((unsigned long long int*)&cSim.pNBForceYAccumulator[imgPos], llitoulli(llrint((PMEForce)ef_frcy * FORCESCALE)));
+        atomicAdd((unsigned long long int*)&cSim.pNBForceZAccumulator[imgPos], llitoulli(llrint((PMEForce)ef_frcz * FORCESCALE)));
+#endif
+    
+#ifdef PME_ENERGY
+        PMEDouble AtomX                                 = (PMEDouble)cSim.pImageX[imgPos];
+        PMEDouble AtomY                                 = (PMEDouble)cSim.pImageY[imgPos];
+        PMEDouble AtomZ                                 = (PMEDouble)cSim.pImageZ[imgPos];
+        PMEDouble ef_vx                                 = AtomX - (PMEDouble)cSim.ucell[0][0];
+        PMEDouble ef_vy                                 = AtomY - (PMEDouble)cSim.ucell[1][1];
+        PMEDouble ef_vz                                 = AtomZ - (PMEDouble)cSim.ucell[2][2];
+#ifndef use_DPFP
+        sEEField                                       -= fast_llrintf(ENERGYSCALEF*(PMEFloat)(ef_vx * ef_frcx + ef_vy * ef_frcy + ef_vz * ef_frcz));
+#else
+        sEEField                                       -= (ef_vx * (PMEDouble)ef_frcx + ef_vy * (PMEDouble)ef_frcy + ef_vz * (PMEDouble)ef_frcz);
+#endif
+#endif
+        pos                                            += increment;
+    
+#ifdef PME_ENERGY
+#ifndef use_DPFP
+        atomicAdd(cSim.pEEField, llitoulli(sEEField));
+#else
+        atomicAdd(cSim.pEEField, llitoulli(llrint(sEEField * ENERGYSCALE)));
+#endif
+#endif
+    }
+}
diff --git src/pmemd/src/cuda/kForcesUpdate.cu src/pmemd/src/cuda/kForcesUpdate.cu
index ab0cf7b..b160c23 100644
--- src/pmemd/src/cuda/kForcesUpdate.cu
+++ src/pmemd/src/cuda/kForcesUpdate.cu
@@ -621,7 +621,7 @@ __launch_bounds__(SM_2X_GENERAL_THREADS_PER_BLOCK, 1)
 #endif
 kRefreshCharges_kernel()
 {
-	unsigned int pos                                = blockIdx.x * blockDim.x + threadIdx.x;
+    unsigned int pos                                = blockIdx.x * blockDim.x + threadIdx.x;
     unsigned int increment                          = gridDim.x * blockDim.x;
     while (pos < cSim.atoms)
     {
diff --git src/pmemd/src/cuda/kU.h src/pmemd/src/cuda/kU.h
index 50adae4..7065f41 100644
--- src/pmemd/src/cuda/kU.h
+++ src/pmemd/src/cuda/kU.h
@@ -122,10 +122,10 @@
             double newAtomY                         = atomY + velY * dtx;
             double newAtomZ                         = atomZ + velZ * dtx;
 
-
             ATOMX(pos)                              = newAtomX;
             ATOMY(pos)                              = newAtomY;
             ATOMZ(pos)                              = newAtomZ;
+
 #ifndef UPDATE_NEIGHBORLIST              
             PMEFloat2 xy;
             xy.x                                    = newAtomX;
diff --git src/pmemd/src/get_efield_energy.F90 src/pmemd/src/get_efield_energy.F90
index f107dac..01c0433 100644
--- src/pmemd/src/get_efield_energy.F90
+++ src/pmemd/src/get_efield_energy.F90
@@ -78,9 +78,9 @@ subroutine get_efield_energy(img_frc, crd, img_qterm, img_atm_map, &
   !Normalize efield only works in a box.  Note: add trap
 
   if(efn .eq. 1) then
-     loc_efx=loc_efx/ucell(1,1) 
-     loc_efy=loc_efy/ucell(2,2)
-     loc_efz=loc_efz/ucell(3,3)
+     loc_efx=loc_efx*recip(1,1) 
+     loc_efy=loc_efy*recip(2,2)
+     loc_efz=loc_efz*recip(3,3)
   end if  
 
   if (need_pot_enes) then
@@ -93,7 +93,7 @@ subroutine get_efield_energy(img_frc, crd, img_qterm, img_atm_map, &
 
         crd_i = img_atm_map(img_i)    !convert image array to crd array
         
-        charge = img_qterm(img_i) / AMBER_ELECTROSTATIC
+        charge = img_qterm(img_i) * ONE_AMBER_ELECTROSTATIC
 
         efrcx = charge*loc_efx
         efrcy = charge*loc_efy
@@ -135,14 +135,6 @@ subroutine get_efield_energy(img_frc, crd, img_qterm, img_atm_map, &
 
   end if
 
-  ! Save the energies:
-  
-  if(efn .eq. 0) then
-      loc_efx = loc_efx * ucell(1,1)
-      loc_efy = loc_efy * ucell(2,2)
-      loc_efz = loc_efz * ucell(3,3)
-  end if
-  
   return
 
 end subroutine get_efield_energy
diff --git src/pmemd/src/pme_alltasks_setup.F90 src/pmemd/src/pme_alltasks_setup.F90
index 8dea97b..53d32ed 100644
--- src/pmemd/src/pme_alltasks_setup.F90
+++ src/pmemd/src/pme_alltasks_setup.F90
@@ -283,7 +283,8 @@ subroutine pme_alltasks_setup(num_ints, num_reals)
   end if
   
 #ifdef CUDA
-  call gpu_pme_alltasks_setup(nfft1, nfft2, nfft3, gbl_prefac1, gbl_prefac2, gbl_prefac3, ew_coeff, ips, fswitch)
+  call gpu_pme_alltasks_setup(nfft1, nfft2, nfft3, gbl_prefac1, gbl_prefac2, gbl_prefac3, ew_coeff, ips, fswitch, efx,&
+       efy, efz, efn, efphase, effreq)
 #endif
 
   return
diff --git src/pmemd/src/pme_force.F90 src/pmemd/src/pme_force.F90
index dec796e..6bf81b6 100644
--- src/pmemd/src/pme_force.F90
+++ src/pmemd/src/pme_force.F90
@@ -336,7 +336,8 @@ subroutine pme_force(atm_cnt, crd, frc, img_atm_map, atm_img_map, &
         end if
         call gpu_calculate_gamd_dihedral_weight(totdih)
       end if
-      call gpu_pme_ene(ew_coeff, uc_volume, pot_ene, enmr, virial, ekcmt)
+      call gpu_pme_ene(ew_coeff, uc_volume, pot_ene, enmr, virial, &
+                       ekcmt, nstep, dt)
       call update_time(nonbond_time)
       if (need_virials) then
         vir%molecular(1,1) = virial(1)
@@ -351,7 +352,7 @@ subroutine pme_force(atm_cnt, crd, frc, img_atm_map, atm_img_map, &
         virial(3) = vir%molecular(3,3)
       end if
     else
-      call gpu_pme_force(ew_coeff, uc_volume, virial, ekcmt)
+      call gpu_pme_force(ew_coeff, uc_volume, virial, ekcmt, nstep, dt)
       call update_time(nonbond_time)
         
       if (need_virials) then 
@@ -1296,9 +1297,10 @@ subroutine pme_force(atm_cnt, crd, frc, img_atm_map, atm_img_map, &
       if((igamd.eq.2).or.(igamd.eq.3))then
         call gpu_calculate_gamd_dihedral_energy_weight()
       endif
-      call gpu_pme_ene(ew_coeff, uc_volume, pot_ene, enmr, virial, ekcmt)
+      call gpu_pme_ene(ew_coeff, uc_volume, pot_ene, enmr, virial, &
+                       ekcmt, nstep, dt)
     else
-      call gpu_pme_force(ew_coeff, uc_volume, virial, ekcmt)
+      call gpu_pme_force(ew_coeff, uc_volume, virial, ekcmt, nstep, dt)
     end if
   else
     call ipsupdate(ntb)
 
diff --git test/cuda/4096wat/Run.pure_wat_efield test/cuda/4096wat/Run.pure_wat_efield
new file mode 100755
index 0000000..2534f81
--- /dev/null
+++ test/cuda/4096wat/Run.pure_wat_efield
@@ -0,0 +1,69 @@
+#!/bin/csh -f
+#TEST-PROGRAM pmemd.cuda
+#TEST-DESCRIP TO_BE_DEtermined
+#TEST-PURPOSE regression, basic
+#TEST-STATE   undocumented
+
+#$1 = PREC_MODEL
+#$2 = NETCDF
+
+if( ! $?DO_PARALLEL ) then
+  setenv DO_PARALLEL " "
+  if( $?TESTsander ) then
+      set sander = $TESTsander
+  else
+      set sander = ../../../bin/pmemd.cuda_$1
+  endif
+else
+  if( $?TESTsander ) then
+      set sander = $TESTsander
+  else
+      set sander = ../../../bin/pmemd.cuda_$1.MPI
+  endif
+endif
+
+
+cat > mdin <<EOF
+ short md, nve ensemble
+ &cntrl
+   ntx=5, irest=1,
+   ntc=2, ntf=2, tol=0.0000001, 
+   nstlim=10, ntt=0, 
+   ntpr=1, ntwr=10000, 
+   dt=0.001, ig=71277, 
+   efx=1,efy=1,efz=1,efphase=2,effreq=2,
+ /
+ &ewald
+   nfft1=60, nfft2=60, nfft3=60,
+ /
+EOF
+
+set output = mdout.pure_wat_efield
+
+set output_save = $output.GPU_$1
+
+touch dummy
+$DO_PARALLEL $sander -O -i mdin -c eq1.x -o $output <dummy || goto error
+#Use different dacdif tolerances based on precision model.
+if ( "$1" == "DPFP" ) then
+  #7 sig figs
+  ../../dacdif -r 1.0e-7 $output_save $output
+else
+  if ( $?DO_PARALLEL ) then
+    #Use slightly less precision for parallel runs.
+    #3 sig figs
+    #Special case for 4096wat since it has one value with
+    #a big relative error - so you get a 1 line failure.
+    ../../dacdif -r 1.0e-2 $output_save $output
+  else 
+    #5 sig figs
+    ../../dacdif -r 1.0e-5 $output_save $output
+  endif
+endif
+
+/bin/rm -f mdin logfile mdinfo dummy mdcrd restrt
+exit(0)
+
+error:
+echo "  ${0}:  Program error"
+exit(1)
diff --git test/cuda/4096wat/mdout.pure_wat_efield.GPU_DPFP test/cuda/4096wat/mdout.pure_wat_efield.GPU_DPFP
new file mode 100644
index 0000000..30fb73a
--- /dev/null
+++ test/cuda/4096wat/mdout.pure_wat_efield.GPU_DPFP
@@ -0,0 +1,407 @@
+
+          -------------------------------------------------------
+          Amber 16 PMEMD                              2016
+          -------------------------------------------------------
+
+| PMEMD implementation of SANDER, Release 16
+
+| Run on 05/04/2016 at 14:04:45
+
+|   Executable path: ../../../bin/pmemd.cuda_DPFP
+| Working directory: /server-home1/charlie/amberefield/test/cuda/4096wat
+|          Hostname: beatrix
+
+  [-O]verwriting output
+
+File Assignments:
+|   MDIN: mdin                                                                  
+|  MDOUT: mdout.pure_wat_efield                                                 
+| INPCRD: eq1.x                                                                 
+|   PARM: prmtop                                                                
+| RESTRT: restrt                                                                
+|   REFC: refc                                                                  
+|  MDVEL: mdvel                                                                 
+|   MDEN: mden                                                                  
+|  MDCRD: mdcrd                                                                 
+| MDINFO: mdinfo                                                                
+|  MDFRC: mdfrc                                                                 
+
+
+ Here is the input file:
+
+ short md, nve ensemble                                                        
+ &cntrl                                                                        
+   ntx=5, irest=1,                                                             
+   ntc=2, ntf=2, tol=0.0000001,                                                
+   nstlim=10, ntt=0,                                                           
+   ntpr=1, ntwr=10000,                                                         
+   dt=0.001, ig=71277,                                                         
+   efx=1,efy=1,efz=1,efphase=2,effreq=2,                                       
+ /                                                                             
+ &ewald                                                                        
+   nfft1=60, nfft2=60, nfft3=60,                                               
+ /                                                                             
+
+
+ 
+|--------------------- INFORMATION ----------------------
+| GPU (CUDA) Version of PMEMD in use: NVIDIA GPU IN USE.
+|                    Version 16.0.0
+| 
+|                      02/25/2016
+| 
+| Implementation by:
+|                    Ross C. Walker     (SDSC)
+|                    Scott Le Grand     (nVIDIA)
+| 
+| Precision model in use:
+|      [DPFP] - Double Precision Forces, 64-bit Fixed point
+|               Accumulation.
+| 
+|--------------------------------------------------------
+ 
+|----------------- CITATION INFORMATION -----------------
+|
+|    When publishing work that utilized the CUDA version
+|    of AMBER, please cite the following in addition to
+|    the regular AMBER citations:
+|
+|  - Romelia Salomon-Ferrer; Andreas W. Goetz; Duncan
+|    Poole; Scott Le Grand; Ross C. Walker "Routine
+|    microsecond molecular dynamics simulations with
+|    AMBER - Part II: Particle Mesh Ewald", J. Chem.
+|    Theory Comput., 2013, 9 (9), pp3878-3888,
+|    DOI: 10.1021/ct400314y.
+|
+|  - Andreas W. Goetz; Mark J. Williamson; Dong Xu;
+|    Duncan Poole; Scott Le Grand; Ross C. Walker
+|    "Routine microsecond molecular dynamics simulations
+|    with AMBER - Part I: Generalized Born", J. Chem.
+|    Theory Comput., 2012, 8 (5), pp1542-1555.
+|
+|--------------------------------------------------------
+ 
+|------------------- GPU DEVICE INFO --------------------
+|
+|            CUDA_VISIBLE_DEVICES: not set
+|   CUDA Capable Devices Detected:      2
+|           CUDA Device ID in use:      0
+|                CUDA Device Name: GeForce GTX 980 Ti
+|     CUDA Device Global Mem Size:   6143 MB
+| CUDA Device Num Multiprocessors:     22
+|           CUDA Device Core Freq:   1.08 GHz
+|
+|--------------------------------------------------------
+ 
+ 
+| Conditional Compilation Defines Used:
+| PUBFFT
+| BINTRAJ
+| CUDA
+| EMIL
+
+| Largest sphere to fit in unit cell has radius =    24.800
+
+|  INFO: Old style PARM file read
+
+
+| Note: 1-4 EEL scale factors were NOT found in the topology file.
+|       Using default value of 1.2.
+
+| Note: 1-4 VDW scale factors were NOT found in the topology file.
+|       Using default value of 2.0.
+| Duplicated    0 dihedrals
+
+| Duplicated    0 dihedrals
+
+--------------------------------------------------------------------------------
+   1.  RESOURCE   USE: 
+--------------------------------------------------------------------------------
+
+ getting new box info from bottom of inpcrd
+ NATOM  =   12288 NTYPES =       2 NBONH =   12288 MBONA  =       0
+ NTHETH =       0 MTHETA =       0 NPHIH =       0 MPHIA  =       0
+ NHPARM =       0 NPARM  =       0 NNB   =   16384 NRES   =    4096
+ NBONA  =       0 NTHETA =       0 NPHIA =       0 NUMBND =       2
+ NUMANG =       0 NPTRA  =       0 NATYP =       2 NPHB   =       1
+ IFBOX  =       1 NMXRS  =       3 IFCAP =       0 NEXTRA =       0
+ NCOPY  =       0
+
+| Coordinate Index Table dimensions:    11   11   11
+| Direct force subcell size =     4.5091    4.5091    4.5091
+
+     BOX TYPE: RECTILINEAR
+
+--------------------------------------------------------------------------------
+   2.  CONTROL  DATA  FOR  THE  RUN
+--------------------------------------------------------------------------------
+
+                                                                                
+
+General flags:
+     imin    =       0, nmropt  =       0
+
+Nature and format of input:
+     ntx     =       5, irest   =       1, ntrx    =       1
+
+Nature and format of output:
+     ntxo    =       2, ntpr    =       1, ntrx    =       1, ntwr    =   10000
+     iwrap   =       0, ntwx    =       0, ntwv    =       0, ntwe    =       0
+     ioutfm  =       1, ntwprt  =       0, idecomp =       0, rbornstat=      0
+
+Potential function:
+     ntf     =       2, ntb     =       1, igb     =       0, nsnb    =      25
+     ipol    =       0, gbsa    =       0, iesp    =       0
+     dielc   =   1.00000, cut     =   8.00000, intdiel =   1.00000
+
+Frozen or restrained atoms:
+     ibelly  =       0, ntr     =       0
+
+Molecular dynamics:
+     nstlim  =        10, nscm    =      1000, nrespa  =         1
+     t       =   0.00000, dt      =   0.00100, vlimit  =  -1.00000
+
+SHAKE:
+     ntc     =       2, jfastw  =       0
+     tol     =   0.00000
+
+| Intermolecular bonds treatment:
+|     no_intermolecular_bonds =       1
+
+| Energy averages sample interval:
+|     ene_avg_sampling =       1
+
+Ewald parameters:
+     verbose =       0, ew_type =       0, nbflag  =       1, use_pme =       1
+     vdwmeth =       1, eedmeth =       1, netfrc  =       1
+     Box X =   49.600   Box Y =   49.600   Box Z =   49.600
+     Alpha =   90.000   Beta  =   90.000   Gamma =   90.000
+     NFFT1 =   60       NFFT2 =   60       NFFT3 =   60
+     Cutoff=    8.000   Tol   =0.100E-04
+     Ewald Coefficient =  0.34864
+     Interpolation order =    4
+
+--------------------------------------------------------------------------------
+   3.  ATOMIC COORDINATES AND VELOCITIES
+--------------------------------------------------------------------------------
+
+                                                                                
+ begin time read from input coords =     1.000 ps
+
+ 
+ Number of triangulated 3-point waters found:     4096
+
+     Sum of charges from parm topology file =   0.00000000
+     Forcing neutrality...
+
+| Dynamic Memory, Types Used:
+| Reals              430127
+| Integers           262151
+
+| Nonbonded Pairs Initial Allocation:     2052403
+
+| GPU memory information (estimate):
+| KB of GPU memory in use:     38624
+| KB of CPU memory in use:     17624
+
+--------------------------------------------------------------------------------
+   4.  RESULTS
+--------------------------------------------------------------------------------
+
+ ---------------------------------------------------
+ APPROXIMATING switch and d/dx switch using CUBIC SPLINE INTERPOLATION
+ using   5000.0 points per unit in tabled values
+ TESTING RELATIVE ERROR over r ranging from 0.0 to cutoff
+| CHECK switch(x): max rel err =   0.2738E-14   at   2.422500
+| CHECK d/dx switch(x): max rel err =   0.8332E-11   at   2.782960
+ ---------------------------------------------------
+|---------------------------------------------------
+| APPROXIMATING direct energy using CUBIC SPLINE INTERPOLATION
+|  with   50.0 points per unit in tabled values
+| Relative Error Limit not exceeded for r .gt.   2.47
+| APPROXIMATING direct force using CUBIC SPLINE INTERPOLATION
+|  with   50.0 points per unit in tabled values
+| Relative Error Limit not exceeded for r .gt.   2.89
+|---------------------------------------------------
+
+ NSTEP =        1   TIME(PS) =       1.001  TEMP(K) =   298.30  PRESS =     0.0
+ Etot   =    -32104.0964  EKtot   =      7283.2797  EPtot      =    -39387.3761
+ BOND   =         0.0000  ANGLE   =         0.0000  DIHED      =         0.0000
+ 1-4 NB =         0.0000  1-4 EEL =         0.0000  VDWAALS    =      6028.9517
+ EELEC  =    -45371.6764  EHBOND  =         0.0000  RESTRAINT  =         0.0000
+ ELECTRIC_FIELD =       -44.6514
+ ------------------------------------------------------------------------------
+
+
+ NSTEP =        2   TIME(PS) =       1.002  TEMP(K) =   298.05  PRESS =     0.0
+ Etot   =    -32104.1986  EKtot   =      7277.1259  EPtot      =    -39381.3244
+ BOND   =         0.0000  ANGLE   =         0.0000  DIHED      =         0.0000
+ 1-4 NB =         0.0000  1-4 EEL =         0.0000  VDWAALS    =      6029.7294
+ EELEC  =    -45364.7143  EHBOND  =         0.0000  RESTRAINT  =         0.0000
+ ELECTRIC_FIELD =       -46.3394
+ ------------------------------------------------------------------------------
+
+
+ NSTEP =        3   TIME(PS) =       1.003  TEMP(K) =   297.92  PRESS =     0.0
+ Etot   =    -32104.1463  EKtot   =      7273.8678  EPtot      =    -39378.0140
+ BOND   =         0.0000  ANGLE   =         0.0000  DIHED      =         0.0000
+ 1-4 NB =         0.0000  1-4 EEL =         0.0000  VDWAALS    =      6030.3295
+ EELEC  =    -45359.3171  EHBOND  =         0.0000  RESTRAINT  =         0.0000
+ ELECTRIC_FIELD =       -49.0264
+ ------------------------------------------------------------------------------
+
+
+ NSTEP =        4   TIME(PS) =       1.004  TEMP(K) =   297.90  PRESS =     0.0
+ Etot   =    -32104.1539  EKtot   =      7273.3270  EPtot      =    -39377.4809
+ BOND   =         0.0000  ANGLE   =         0.0000  DIHED      =         0.0000
+ 1-4 NB =         0.0000  1-4 EEL =         0.0000  VDWAALS    =      6030.8500
+ EELEC  =    -45355.6508  EHBOND  =         0.0000  RESTRAINT  =         0.0000
+ ELECTRIC_FIELD =       -52.6801
+ ------------------------------------------------------------------------------
+
+
+ NSTEP =        5   TIME(PS) =       1.005  TEMP(K) =   297.97  PRESS =     0.0
+ Etot   =    -32104.2660  EKtot   =      7275.1598  EPtot      =    -39379.4258
+ BOND   =         0.0000  ANGLE   =         0.0000  DIHED      =         0.0000
+ 1-4 NB =         0.0000  1-4 EEL =         0.0000  VDWAALS    =      6031.2665
+ EELEC  =    -45353.4381  EHBOND  =         0.0000  RESTRAINT  =         0.0000
+ ELECTRIC_FIELD =       -57.2542
+ ------------------------------------------------------------------------------
+
+
+ NSTEP =        6   TIME(PS) =       1.006  TEMP(K) =   298.12  PRESS =     0.0
+ Etot   =    -32104.2858  EKtot   =      7278.9064  EPtot      =    -39383.1922
+ BOND   =         0.0000  ANGLE   =         0.0000  DIHED      =         0.0000
+ 1-4 NB =         0.0000  1-4 EEL =         0.0000  VDWAALS    =      6031.6963
+ EELEC  =    -45352.1996  EHBOND  =         0.0000  RESTRAINT  =         0.0000
+ ELECTRIC_FIELD =       -62.6888
+ ------------------------------------------------------------------------------
+
+
+ NSTEP =        7   TIME(PS) =       1.007  TEMP(K) =   298.34  PRESS =     0.0
+ Etot   =    -32104.3846  EKtot   =      7284.0507  EPtot      =    -39388.4353
+ BOND   =         0.0000  ANGLE   =         0.0000  DIHED      =         0.0000
+ 1-4 NB =         0.0000  1-4 EEL =         0.0000  VDWAALS    =      6032.1757
+ EELEC  =    -45351.6995  EHBOND  =         0.0000  RESTRAINT  =         0.0000
+ ELECTRIC_FIELD =       -68.9115
+ ------------------------------------------------------------------------------
+
+
+ NSTEP =        8   TIME(PS) =       1.008  TEMP(K) =   298.58  PRESS =     0.0
+ Etot   =    -32104.3939  EKtot   =      7290.0714  EPtot      =    -39394.4652
+ BOND   =         0.0000  ANGLE   =         0.0000  DIHED      =         0.0000
+ 1-4 NB =         0.0000  1-4 EEL =         0.0000  VDWAALS    =      6032.6307
+ EELEC  =    -45351.2574  EHBOND  =         0.0000  RESTRAINT  =         0.0000
+ ELECTRIC_FIELD =       -75.8386
+ ------------------------------------------------------------------------------
+
+
+ NSTEP =        9   TIME(PS) =       1.009  TEMP(K) =   298.84  PRESS =     0.0
+ Etot   =    -32104.3639  EKtot   =      7296.4826  EPtot      =    -39400.8465
+ BOND   =         0.0000  ANGLE   =         0.0000  DIHED      =         0.0000
+ 1-4 NB =         0.0000  1-4 EEL =         0.0000  VDWAALS    =      6033.1408
+ EELEC  =    -45350.6108  EHBOND  =         0.0000  RESTRAINT  =         0.0000
+ ELECTRIC_FIELD =       -83.3765
+ ------------------------------------------------------------------------------
+
+
+ NSTEP =       10   TIME(PS) =       1.010  TEMP(K) =   299.11  PRESS =     0.0
+ Etot   =    -32104.3653  EKtot   =      7302.8708  EPtot      =    -39407.2361
+ BOND   =         0.0000  ANGLE   =         0.0000  DIHED      =         0.0000
+ 1-4 NB =         0.0000  1-4 EEL =         0.0000  VDWAALS    =      6033.6681
+ EELEC  =    -45349.4798  EHBOND  =         0.0000  RESTRAINT  =         0.0000
+ ELECTRIC_FIELD =       -91.4244
+ ------------------------------------------------------------------------------
+
+
+      A V E R A G E S   O V E R      10 S T E P S
+
+
+ NSTEP =       10   TIME(PS) =       1.010  TEMP(K) =   298.31  PRESS =     0.0
+ Etot   =    -32104.2655  EKtot   =      7283.5142  EPtot      =    -39387.7797
+ BOND   =         0.0000  ANGLE   =         0.0000  DIHED      =         0.0000
+ 1-4 NB =         0.0000  1-4 EEL =         0.0000  VDWAALS    =      6031.4439
+ EELEC  =    -45356.0044  EHBOND  =         0.0000  RESTRAINT  =         0.0000
+ ELECTRIC_FIELD =       -63.2191
+ ------------------------------------------------------------------------------
+
+
+      R M S  F L U C T U A T I O N S
+
+
+ NSTEP =       10   TIME(PS) =       1.010  TEMP(K) =     0.39  PRESS =     0.0
+ Etot   =         0.1051  EKtot   =         9.5649  EPtot      =         9.6360
+ BOND   =         0.0000  ANGLE   =         0.0000  DIHED      =         0.0000
+ 1-4 NB =         0.0000  1-4 EEL =         0.0000  VDWAALS    =         1.4357
+ EELEC  =         6.8291  EHBOND  =         0.0000  RESTRAINT  =         0.0000
+ ELECTRIC_FIELD =        15.3999
+|E(PBS) =         0.0574
+ ------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+   5.  TIMINGS
+--------------------------------------------------------------------------------
+
+|  NonSetup CPU Time in Major Routines:
+|
+|     Routine           Sec        %
+|     ------------------------------
+|     Nonbond           0.09   94.11
+|     Bond              0.00    0.00
+|     Angle             0.00    0.00
+|     Dihedral          0.00    0.00
+|     Shake             0.00    0.12
+|     RunMD             0.01    5.74
+|     Other             0.00    0.03
+|     ------------------------------
+|     Total             0.10
+
+|  PME Nonbond Pairlist CPU Time:
+|
+|     Routine              Sec        %
+|     ---------------------------------
+|     Set Up Cit           0.00    0.00
+|     Build List           0.00    0.00
+|     ---------------------------------
+|     Total                0.00    0.00
+
+|  PME Direct Force CPU Time:
+|
+|     Routine              Sec        %
+|     ---------------------------------
+|     NonBonded Calc       0.00    0.00
+|     Exclude Masked       0.00    0.00
+|     Other                0.00    0.01
+|     ---------------------------------
+|     Total                0.00    0.01
+
+|  PME Reciprocal Force CPU Time:
+|
+|     Routine              Sec        %
+|     ---------------------------------
+|     1D bspline           0.00    0.00
+|     Grid Charges         0.00    0.00
+|     Scalar Sum           0.00    0.00
+|     Gradient Sum         0.00    0.00
+|     FFT                  0.00    0.00
+|     ---------------------------------
+|     Total                0.00    0.00
+
+|  Final Performance Info:
+|     -----------------------------------------------------
+|     Average timings for last       9 steps:
+|     Elapsed(s) =       0.09 Per Step(ms) =      10.55
+|         ns/day =       8.19   seconds/ns =   10545.00
+|
+|     Average timings for all steps:
+|     Elapsed(s) =       0.11 Per Step(ms) =      10.82
+|         ns/day =       7.99   seconds/ns =   10817.09
+|     -----------------------------------------------------
+
+|  Setup CPU time:            0.60 seconds
+|  NonSetup CPU time:         0.10 seconds
+|  Total CPU time:            0.70 seconds     0.00 hours
+
+|  Setup wall time:           1    seconds
+|  NonSetup wall time:        0    seconds
+|  Total wall time:           1    seconds     0.00 hours
diff --git test/cuda/4096wat/mdout.pure_wat_efield.GPU_SPFP test/cuda/4096wat/mdout.pure_wat_efield.GPU_SPFP
new file mode 100644
index 0000000..5293730
--- /dev/null
+++ test/cuda/4096wat/mdout.pure_wat_efield.GPU_SPFP
@@ -0,0 +1,413 @@
+
+          -------------------------------------------------------
+          Amber 16 PMEMD                              2016
+          -------------------------------------------------------
+
+| PMEMD implementation of SANDER, Release 16
+
+| Run on 05/04/2016 at 14:04:15
+
+|   Executable path: ../../../bin/pmemd.cuda_SPFP
+| Working directory: /server-home1/charlie/amberefield/test/cuda/4096wat
+|          Hostname: beatrix
+
+  [-O]verwriting output
+
+File Assignments:
+|   MDIN: mdin                                                                  
+|  MDOUT: mdout.pure_wat_efield                                                 
+| INPCRD: eq1.x                                                                 
+|   PARM: prmtop                                                                
+| RESTRT: restrt                                                                
+|   REFC: refc                                                                  
+|  MDVEL: mdvel                                                                 
+|   MDEN: mden                                                                  
+|  MDCRD: mdcrd                                                                 
+| MDINFO: mdinfo                                                                
+|  MDFRC: mdfrc                                                                 
+
+
+ Here is the input file:
+
+ short md, nve ensemble                                                        
+ &cntrl                                                                        
+   ntx=5, irest=1,                                                             
+   ntc=2, ntf=2, tol=0.0000001,                                                
+   nstlim=10, ntt=0,                                                           
+   ntpr=1, ntwr=10000,                                                         
+   dt=0.001, ig=71277,                                                         
+   efx=1,efy=1,efz=1,efphase=2,effreq=2,                                       
+ /                                                                             
+ &ewald                                                                        
+   nfft1=60, nfft2=60, nfft3=60,                                               
+ /                                                                             
+
+
+ 
+|--------------------- INFORMATION ----------------------
+| GPU (CUDA) Version of PMEMD in use: NVIDIA GPU IN USE.
+|                    Version 16.0.0
+| 
+|                      02/25/2016
+| 
+| Implementation by:
+|                    Ross C. Walker     (SDSC)
+|                    Scott Le Grand     (nVIDIA)
+| 
+| Precision model in use:
+|      [SPFP] - Single Precision Forces, 64-bit Fixed Point
+|               Accumulation. (Default)
+| 
+|--------------------------------------------------------
+ 
+|----------------- CITATION INFORMATION -----------------
+|
+|    When publishing work that utilized the CUDA version
+|    of AMBER, please cite the following in addition to
+|    the regular AMBER citations:
+|
+|  - Romelia Salomon-Ferrer; Andreas W. Goetz; Duncan
+|    Poole; Scott Le Grand; Ross C. Walker "Routine
+|    microsecond molecular dynamics simulations with
+|    AMBER - Part II: Particle Mesh Ewald", J. Chem.
+|    Theory Comput., 2013, 9 (9), pp3878-3888,
+|    DOI: 10.1021/ct400314y.
+|
+|  - Andreas W. Goetz; Mark J. Williamson; Dong Xu;
+|    Duncan Poole; Scott Le Grand; Ross C. Walker
+|    "Routine microsecond molecular dynamics simulations
+|    with AMBER - Part I: Generalized Born", J. Chem.
+|    Theory Comput., 2012, 8 (5), pp1542-1555.
+|
+|  - Scott Le Grand; Andreas W. Goetz; Ross C. Walker
+|    "SPFP: Speed without compromise - a mixed precision
+|    model for GPU accelerated molecular dynamics
+|    simulations.", Comp. Phys. Comm., 2013, 184
+|    pp374-380, DOI: 10.1016/j.cpc.2012.09.022
+|
+|--------------------------------------------------------
+ 
+|------------------- GPU DEVICE INFO --------------------
+|
+|            CUDA_VISIBLE_DEVICES: not set
+|   CUDA Capable Devices Detected:      2
+|           CUDA Device ID in use:      0
+|                CUDA Device Name: GeForce GTX 980 Ti
+|     CUDA Device Global Mem Size:   6143 MB
+| CUDA Device Num Multiprocessors:     22
+|           CUDA Device Core Freq:   1.08 GHz
+|
+|--------------------------------------------------------
+ 
+ 
+| Conditional Compilation Defines Used:
+| PUBFFT
+| BINTRAJ
+| CUDA
+| EMIL
+
+| Largest sphere to fit in unit cell has radius =    24.800
+
+|  INFO: Old style PARM file read
+
+
+| Note: 1-4 EEL scale factors were NOT found in the topology file.
+|       Using default value of 1.2.
+
+| Note: 1-4 VDW scale factors were NOT found in the topology file.
+|       Using default value of 2.0.
+| Duplicated    0 dihedrals
+
+| Duplicated    0 dihedrals
+
+--------------------------------------------------------------------------------
+   1.  RESOURCE   USE: 
+--------------------------------------------------------------------------------
+
+ getting new box info from bottom of inpcrd
+ NATOM  =   12288 NTYPES =       2 NBONH =   12288 MBONA  =       0
+ NTHETH =       0 MTHETA =       0 NPHIH =       0 MPHIA  =       0
+ NHPARM =       0 NPARM  =       0 NNB   =   16384 NRES   =    4096
+ NBONA  =       0 NTHETA =       0 NPHIA =       0 NUMBND =       2
+ NUMANG =       0 NPTRA  =       0 NATYP =       2 NPHB   =       1
+ IFBOX  =       1 NMXRS  =       3 IFCAP =       0 NEXTRA =       0
+ NCOPY  =       0
+
+| Coordinate Index Table dimensions:    11   11   11
+| Direct force subcell size =     4.5091    4.5091    4.5091
+
+     BOX TYPE: RECTILINEAR
+
+--------------------------------------------------------------------------------
+   2.  CONTROL  DATA  FOR  THE  RUN
+--------------------------------------------------------------------------------
+
+                                                                                
+
+General flags:
+     imin    =       0, nmropt  =       0
+
+Nature and format of input:
+     ntx     =       5, irest   =       1, ntrx    =       1
+
+Nature and format of output:
+     ntxo    =       2, ntpr    =       1, ntrx    =       1, ntwr    =   10000
+     iwrap   =       0, ntwx    =       0, ntwv    =       0, ntwe    =       0
+     ioutfm  =       1, ntwprt  =       0, idecomp =       0, rbornstat=      0
+
+Potential function:
+     ntf     =       2, ntb     =       1, igb     =       0, nsnb    =      25
+     ipol    =       0, gbsa    =       0, iesp    =       0
+     dielc   =   1.00000, cut     =   8.00000, intdiel =   1.00000
+
+Frozen or restrained atoms:
+     ibelly  =       0, ntr     =       0
+
+Molecular dynamics:
+     nstlim  =        10, nscm    =      1000, nrespa  =         1
+     t       =   0.00000, dt      =   0.00100, vlimit  =  -1.00000
+
+SHAKE:
+     ntc     =       2, jfastw  =       0
+     tol     =   0.00000
+
+| Intermolecular bonds treatment:
+|     no_intermolecular_bonds =       1
+
+| Energy averages sample interval:
+|     ene_avg_sampling =       1
+
+Ewald parameters:
+     verbose =       0, ew_type =       0, nbflag  =       1, use_pme =       1
+     vdwmeth =       1, eedmeth =       1, netfrc  =       1
+     Box X =   49.600   Box Y =   49.600   Box Z =   49.600
+     Alpha =   90.000   Beta  =   90.000   Gamma =   90.000
+     NFFT1 =   60       NFFT2 =   60       NFFT3 =   60
+     Cutoff=    8.000   Tol   =0.100E-04
+     Ewald Coefficient =  0.34864
+     Interpolation order =    4
+
+--------------------------------------------------------------------------------
+   3.  ATOMIC COORDINATES AND VELOCITIES
+--------------------------------------------------------------------------------
+
+                                                                                
+ begin time read from input coords =     1.000 ps
+
+ 
+ Number of triangulated 3-point waters found:     4096
+
+     Sum of charges from parm topology file =   0.00000000
+     Forcing neutrality...
+
+| Dynamic Memory, Types Used:
+| Reals              430127
+| Integers           262151
+
+| Nonbonded Pairs Initial Allocation:     2052403
+
+| GPU memory information (estimate):
+| KB of GPU memory in use:     35919
+| KB of CPU memory in use:     14919
+
+--------------------------------------------------------------------------------
+   4.  RESULTS
+--------------------------------------------------------------------------------
+
+ ---------------------------------------------------
+ APPROXIMATING switch and d/dx switch using CUBIC SPLINE INTERPOLATION
+ using   5000.0 points per unit in tabled values
+ TESTING RELATIVE ERROR over r ranging from 0.0 to cutoff
+| CHECK switch(x): max rel err =   0.2738E-14   at   2.422500
+| CHECK d/dx switch(x): max rel err =   0.8332E-11   at   2.782960
+ ---------------------------------------------------
+|---------------------------------------------------
+| APPROXIMATING direct energy using CUBIC SPLINE INTERPOLATION
+|  with   50.0 points per unit in tabled values
+| Relative Error Limit not exceeded for r .gt.   2.47
+| APPROXIMATING direct force using CUBIC SPLINE INTERPOLATION
+|  with   50.0 points per unit in tabled values
+| Relative Error Limit not exceeded for r .gt.   2.89
+|---------------------------------------------------
+
+ NSTEP =        1   TIME(PS) =       1.001  TEMP(K) =   298.30  PRESS =     0.0
+ Etot   =    -32104.0580  EKtot   =      7283.2793  EPtot      =    -39387.3373
+ BOND   =         0.0000  ANGLE   =         0.0000  DIHED      =         0.0000
+ 1-4 NB =         0.0000  1-4 EEL =         0.0000  VDWAALS    =      6028.9498
+ EELEC  =    -45371.6355  EHBOND  =         0.0000  RESTRAINT  =         0.0000
+ ELECTRIC_FIELD =       -44.6516
+ ------------------------------------------------------------------------------
+
+
+ NSTEP =        2   TIME(PS) =       1.002  TEMP(K) =   298.05  PRESS =     0.0
+ Etot   =    -32104.1594  EKtot   =      7277.1255  EPtot      =    -39381.2849
+ BOND   =         0.0000  ANGLE   =         0.0000  DIHED      =         0.0000
+ 1-4 NB =         0.0000  1-4 EEL =         0.0000  VDWAALS    =      6029.7276
+ EELEC  =    -45364.6731  EHBOND  =         0.0000  RESTRAINT  =         0.0000
+ ELECTRIC_FIELD =       -46.3395
+ ------------------------------------------------------------------------------
+
+
+ NSTEP =        3   TIME(PS) =       1.003  TEMP(K) =   297.92  PRESS =     0.0
+ Etot   =    -32104.1068  EKtot   =      7273.8682  EPtot      =    -39377.9750
+ BOND   =         0.0000  ANGLE   =         0.0000  DIHED      =         0.0000
+ 1-4 NB =         0.0000  1-4 EEL =         0.0000  VDWAALS    =      6030.3281
+ EELEC  =    -45359.2765  EHBOND  =         0.0000  RESTRAINT  =         0.0000
+ ELECTRIC_FIELD =       -49.0266
+ ------------------------------------------------------------------------------
+
+
+ NSTEP =        4   TIME(PS) =       1.004  TEMP(K) =   297.90  PRESS =     0.0
+ Etot   =    -32104.1155  EKtot   =      7273.3271  EPtot      =    -39377.4427
+ BOND   =         0.0000  ANGLE   =         0.0000  DIHED      =         0.0000
+ 1-4 NB =         0.0000  1-4 EEL =         0.0000  VDWAALS    =      6030.8484
+ EELEC  =    -45355.6109  EHBOND  =         0.0000  RESTRAINT  =         0.0000
+ ELECTRIC_FIELD =       -52.6802
+ ------------------------------------------------------------------------------
+
+
+ NSTEP =        5   TIME(PS) =       1.005  TEMP(K) =   297.97  PRESS =     0.0
+ Etot   =    -32104.2285  EKtot   =      7275.1592  EPtot      =    -39379.3877
+ BOND   =         0.0000  ANGLE   =         0.0000  DIHED      =         0.0000
+ 1-4 NB =         0.0000  1-4 EEL =         0.0000  VDWAALS    =      6031.2650
+ EELEC  =    -45353.3984  EHBOND  =         0.0000  RESTRAINT  =         0.0000
+ ELECTRIC_FIELD =       -57.2542
+ ------------------------------------------------------------------------------
+
+
+ NSTEP =        6   TIME(PS) =       1.006  TEMP(K) =   298.12  PRESS =     0.0
+ Etot   =    -32104.2479  EKtot   =      7278.9058  EPtot      =    -39383.1537
+ BOND   =         0.0000  ANGLE   =         0.0000  DIHED      =         0.0000
+ 1-4 NB =         0.0000  1-4 EEL =         0.0000  VDWAALS    =      6031.6949
+ EELEC  =    -45352.1598  EHBOND  =         0.0000  RESTRAINT  =         0.0000
+ ELECTRIC_FIELD =       -62.6887
+ ------------------------------------------------------------------------------
+
+
+ NSTEP =        7   TIME(PS) =       1.007  TEMP(K) =   298.34  PRESS =     0.0
+ Etot   =    -32104.3461  EKtot   =      7284.0508  EPtot      =    -39388.3969
+ BOND   =         0.0000  ANGLE   =         0.0000  DIHED      =         0.0000
+ 1-4 NB =         0.0000  1-4 EEL =         0.0000  VDWAALS    =      6032.1743
+ EELEC  =    -45351.6594  EHBOND  =         0.0000  RESTRAINT  =         0.0000
+ ELECTRIC_FIELD =       -68.9118
+ ------------------------------------------------------------------------------
+
+
+ NSTEP =        8   TIME(PS) =       1.008  TEMP(K) =   298.58  PRESS =     0.0
+ Etot   =    -32104.3556  EKtot   =      7290.0723  EPtot      =    -39394.4279
+ BOND   =         0.0000  ANGLE   =         0.0000  DIHED      =         0.0000
+ 1-4 NB =         0.0000  1-4 EEL =         0.0000  VDWAALS    =      6032.6290
+ EELEC  =    -45351.2183  EHBOND  =         0.0000  RESTRAINT  =         0.0000
+ ELECTRIC_FIELD =       -75.8386
+ ------------------------------------------------------------------------------
+
+
+ NSTEP =        9   TIME(PS) =       1.009  TEMP(K) =   298.84  PRESS =     0.0
+ Etot   =    -32104.3252  EKtot   =      7296.4829  EPtot      =    -39400.8081
+ BOND   =         0.0000  ANGLE   =         0.0000  DIHED      =         0.0000
+ 1-4 NB =         0.0000  1-4 EEL =         0.0000  VDWAALS    =      6033.1392
+ EELEC  =    -45350.5707  EHBOND  =         0.0000  RESTRAINT  =         0.0000
+ ELECTRIC_FIELD =       -83.3766
+ ------------------------------------------------------------------------------
+
+
+ NSTEP =       10   TIME(PS) =       1.010  TEMP(K) =   299.11  PRESS =     0.0
+ Etot   =    -32104.3254  EKtot   =      7302.8706  EPtot      =    -39407.1960
+ BOND   =         0.0000  ANGLE   =         0.0000  DIHED      =         0.0000
+ 1-4 NB =         0.0000  1-4 EEL =         0.0000  VDWAALS    =      6033.6670
+ EELEC  =    -45349.4386  EHBOND  =         0.0000  RESTRAINT  =         0.0000
+ ELECTRIC_FIELD =       -91.4243
+ ------------------------------------------------------------------------------
+
+
+      A V E R A G E S   O V E R      10 S T E P S
+
+
+ NSTEP =       10   TIME(PS) =       1.010  TEMP(K) =   298.31  PRESS =     0.0
+ Etot   =    -32104.2268  EKtot   =      7283.5142  EPtot      =    -39387.7410
+ BOND   =         0.0000  ANGLE   =         0.0000  DIHED      =         0.0000
+ 1-4 NB =         0.0000  1-4 EEL =         0.0000  VDWAALS    =      6031.4423
+ EELEC  =    -45355.9641  EHBOND  =         0.0000  RESTRAINT  =         0.0000
+ ELECTRIC_FIELD =       -63.2192
+ ------------------------------------------------------------------------------
+
+
+      R M S  F L U C T U A T I O N S
+
+
+ NSTEP =       10   TIME(PS) =       1.010  TEMP(K) =     0.39  PRESS =     0.0
+ Etot   =         0.1051  EKtot   =         9.5650  EPtot      =         9.6358
+ BOND   =         0.0000  ANGLE   =         0.0000  DIHED      =         0.0000
+ 1-4 NB =         0.0000  1-4 EEL =         0.0000  VDWAALS    =         1.4358
+ EELEC  =         6.8287  EHBOND  =         0.0000  RESTRAINT  =         0.0000
+ ELECTRIC_FIELD =        15.3998
+|E(PBS) =         0.0578
+ ------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+   5.  TIMINGS
+--------------------------------------------------------------------------------
+
+|  NonSetup CPU Time in Major Routines:
+|
+|     Routine           Sec        %
+|     ------------------------------
+|     Nonbond           0.01   62.22
+|     Bond              0.00    0.00
+|     Angle             0.00    0.00
+|     Dihedral          0.00    0.00
+|     Shake             0.00    0.09
+|     RunMD             0.00   37.51
+|     Other             0.00    0.18
+|     ------------------------------
+|     Total             0.01
+
+|  PME Nonbond Pairlist CPU Time:
+|
+|     Routine              Sec        %
+|     ---------------------------------
+|     Set Up Cit           0.00    0.00
+|     Build List           0.00    0.00
+|     ---------------------------------
+|     Total                0.00    0.00
+
+|  PME Direct Force CPU Time:
+|
+|     Routine              Sec        %
+|     ---------------------------------
+|     NonBonded Calc       0.00    0.00
+|     Exclude Masked       0.00    0.00
+|     Other                0.00    0.01
+|     ---------------------------------
+|     Total                0.00    0.01
+
+|  PME Reciprocal Force CPU Time:
+|
+|     Routine              Sec        %
+|     ---------------------------------
+|     1D bspline           0.00    0.00
+|     Grid Charges         0.00    0.00
+|     Scalar Sum           0.00    0.00
+|     Gradient Sum         0.00    0.00
+|     FFT                  0.00    0.00
+|     ---------------------------------
+|     Total                0.00    0.00
+
+|  Final Performance Info:
+|     -----------------------------------------------------
+|     Average timings for last       9 steps:
+|     Elapsed(s) =       0.02 Per Step(ms) =       1.99
+|         ns/day =      43.42   seconds/ns =    1989.66
+|
+|     Average timings for all steps:
+|     Elapsed(s) =       0.02 Per Step(ms) =       2.22
+|         ns/day =      38.85   seconds/ns =    2224.00
+|     -----------------------------------------------------
+
+|  Setup CPU time:            0.60 seconds
+|  NonSetup CPU time:         0.01 seconds
+|  Total CPU time:            0.61 seconds     0.00 hours
+
+|  Setup wall time:           0    seconds
+|  NonSetup wall time:        0    seconds
+|  Total wall time:           0    seconds     0.00 hours
diff --git test/cuda/4096wat/mdout.pure_wat_efield.GPU_SPXP test/cuda/4096wat/mdout.pure_wat_efield.GPU_SPXP
new file mode 100644
index 0000000..227f5bf
--- /dev/null
+++ test/cuda/4096wat/mdout.pure_wat_efield.GPU_SPXP
@@ -0,0 +1,420 @@
+
+          -------------------------------------------------------
+          Amber 16 PMEMD                              2016
+          -------------------------------------------------------
+
+| PMEMD implementation of SANDER, Release 16
+
+| Run on 05/04/2016 at 14:06:49
+
+|   Executable path: ../../../bin/pmemd.cuda_SPXP
+| Working directory: /server-home1/charlie/amberefield/test/cuda/4096wat
+|          Hostname: beatrix
+
+  [-O]verwriting output
+
+File Assignments:
+|   MDIN: mdin                                                                  
+|  MDOUT: mdout.pure_wat_efield                                                 
+| INPCRD: eq1.x                                                                 
+|   PARM: prmtop                                                                
+| RESTRT: restrt                                                                
+|   REFC: refc                                                                  
+|  MDVEL: mdvel                                                                 
+|   MDEN: mden                                                                  
+|  MDCRD: mdcrd                                                                 
+| MDINFO: mdinfo                                                                
+|  MDFRC: mdfrc                                                                 
+
+
+ Here is the input file:
+
+ short md, nve ensemble                                                        
+ &cntrl                                                                        
+   ntx=5, irest=1,                                                             
+   ntc=2, ntf=2, tol=0.0000001,                                                
+   nstlim=10, ntt=0,                                                           
+   ntpr=1, ntwr=10000,                                                         
+   dt=0.001, ig=71277,                                                         
+   efx=1,efy=1,efz=1,efphase=2,effreq=2,                                       
+ /                                                                             
+ &ewald                                                                        
+   nfft1=60, nfft2=60, nfft3=60,                                               
+ /                                                                             
+
+
+ 
+|--------------------- INFORMATION ----------------------
+| GPU (CUDA) Version of PMEMD in use: NVIDIA GPU IN USE.
+|                    Version 16.0.0
+| 
+|                      02/25/2016
+| 
+| Implementation by:
+|                    Ross C. Walker     (SDSC)
+|                    Scott Le Grand     (nVIDIA)
+| 
+| Precision model in use:
+|      [SPXP] - Single Precision Forces, Mixed Precision
+|               [interger] Accumulation.
+| 
+|                WARNING WARNING WARNING 
+|   The SPXP Precision model is currently experimental and
+|   has not yet been validated as suitable for production
+|   simulations. You use this precision model at your own
+|   risk.
+| 
+| 
+|--------------------------------------------------------
+ 
+|----------------- CITATION INFORMATION -----------------
+|
+|    When publishing work that utilized the CUDA version
+|    of AMBER, please cite the following in addition to
+|    the regular AMBER citations:
+|
+|  - Romelia Salomon-Ferrer; Andreas W. Goetz; Duncan
+|    Poole; Scott Le Grand; Ross C. Walker "Routine
+|    microsecond molecular dynamics simulations with
+|    AMBER - Part II: Particle Mesh Ewald", J. Chem.
+|    Theory Comput., 2013, 9 (9), pp3878-3888,
+|    DOI: 10.1021/ct400314y.
+|
+|  - Andreas W. Goetz; Mark J. Williamson; Dong Xu;
+|    Duncan Poole; Scott Le Grand; Ross C. Walker
+|    "Routine microsecond molecular dynamics simulations
+|    with AMBER - Part I: Generalized Born", J. Chem.
+|    Theory Comput., 2012, 8 (5), pp1542-1555.
+|
+|  - Ross C. Walker; Scott Le Grand
+|    "SPXP: Rethinking precision for molecular dynamics
+|    in the era of vanishing double precision FLOPS."
+|    Comp. Phys. Comm., 2016, in review
+|
+|
+|--------------------------------------------------------
+ 
+|------------------- GPU DEVICE INFO --------------------
+|
+|            CUDA_VISIBLE_DEVICES: not set
+|   CUDA Capable Devices Detected:      2
+|           CUDA Device ID in use:      0
+|                CUDA Device Name: GeForce GTX 980 Ti
+|     CUDA Device Global Mem Size:   6143 MB
+| CUDA Device Num Multiprocessors:     22
+|           CUDA Device Core Freq:   1.08 GHz
+|
+|--------------------------------------------------------
+ 
+ 
+| Conditional Compilation Defines Used:
+| PUBFFT
+| BINTRAJ
+| CUDA
+| EMIL
+
+| Largest sphere to fit in unit cell has radius =    24.800
+
+|  INFO: Old style PARM file read
+
+
+| Note: 1-4 EEL scale factors were NOT found in the topology file.
+|       Using default value of 1.2.
+
+| Note: 1-4 VDW scale factors were NOT found in the topology file.
+|       Using default value of 2.0.
+| Duplicated    0 dihedrals
+
+| Duplicated    0 dihedrals
+
+--------------------------------------------------------------------------------
+   1.  RESOURCE   USE: 
+--------------------------------------------------------------------------------
+
+ getting new box info from bottom of inpcrd
+ NATOM  =   12288 NTYPES =       2 NBONH =   12288 MBONA  =       0
+ NTHETH =       0 MTHETA =       0 NPHIH =       0 MPHIA  =       0
+ NHPARM =       0 NPARM  =       0 NNB   =   16384 NRES   =    4096
+ NBONA  =       0 NTHETA =       0 NPHIA =       0 NUMBND =       2
+ NUMANG =       0 NPTRA  =       0 NATYP =       2 NPHB   =       1
+ IFBOX  =       1 NMXRS  =       3 IFCAP =       0 NEXTRA =       0
+ NCOPY  =       0
+
+| Coordinate Index Table dimensions:    11   11   11
+| Direct force subcell size =     4.5091    4.5091    4.5091
+
+     BOX TYPE: RECTILINEAR
+
+--------------------------------------------------------------------------------
+   2.  CONTROL  DATA  FOR  THE  RUN
+--------------------------------------------------------------------------------
+
+                                                                                
+
+General flags:
+     imin    =       0, nmropt  =       0
+
+Nature and format of input:
+     ntx     =       5, irest   =       1, ntrx    =       1
+
+Nature and format of output:
+     ntxo    =       2, ntpr    =       1, ntrx    =       1, ntwr    =   10000
+     iwrap   =       0, ntwx    =       0, ntwv    =       0, ntwe    =       0
+     ioutfm  =       1, ntwprt  =       0, idecomp =       0, rbornstat=      0
+
+Potential function:
+     ntf     =       2, ntb     =       1, igb     =       0, nsnb    =      25
+     ipol    =       0, gbsa    =       0, iesp    =       0
+     dielc   =   1.00000, cut     =   8.00000, intdiel =   1.00000
+
+Frozen or restrained atoms:
+     ibelly  =       0, ntr     =       0
+
+Molecular dynamics:
+     nstlim  =        10, nscm    =      1000, nrespa  =         1
+     t       =   0.00000, dt      =   0.00100, vlimit  =  -1.00000
+
+SHAKE:
+     ntc     =       2, jfastw  =       0
+     tol     =   0.00000
+
+| Intermolecular bonds treatment:
+|     no_intermolecular_bonds =       1
+
+| Energy averages sample interval:
+|     ene_avg_sampling =       1
+
+Ewald parameters:
+     verbose =       0, ew_type =       0, nbflag  =       1, use_pme =       1
+     vdwmeth =       1, eedmeth =       1, netfrc  =       1
+     Box X =   49.600   Box Y =   49.600   Box Z =   49.600
+     Alpha =   90.000   Beta  =   90.000   Gamma =   90.000
+     NFFT1 =   60       NFFT2 =   60       NFFT3 =   60
+     Cutoff=    8.000   Tol   =0.100E-04
+     Ewald Coefficient =  0.34864
+     Interpolation order =    4
+
+--------------------------------------------------------------------------------
+   3.  ATOMIC COORDINATES AND VELOCITIES
+--------------------------------------------------------------------------------
+
+                                                                                
+ begin time read from input coords =     1.000 ps
+
+ 
+ Number of triangulated 3-point waters found:     4096
+
+     Sum of charges from parm topology file =   0.00000000
+     Forcing neutrality...
+
+| Dynamic Memory, Types Used:
+| Reals              430127
+| Integers           262151
+
+| Nonbonded Pairs Initial Allocation:     2052403
+
+| GPU memory information (estimate):
+| KB of GPU memory in use:     35919
+| KB of CPU memory in use:     14919
+
+--------------------------------------------------------------------------------
+   4.  RESULTS
+--------------------------------------------------------------------------------
+
+ ---------------------------------------------------
+ APPROXIMATING switch and d/dx switch using CUBIC SPLINE INTERPOLATION
+ using   5000.0 points per unit in tabled values
+ TESTING RELATIVE ERROR over r ranging from 0.0 to cutoff
+| CHECK switch(x): max rel err =   0.2738E-14   at   2.422500
+| CHECK d/dx switch(x): max rel err =   0.8332E-11   at   2.782960
+ ---------------------------------------------------
+|---------------------------------------------------
+| APPROXIMATING direct energy using CUBIC SPLINE INTERPOLATION
+|  with   50.0 points per unit in tabled values
+| Relative Error Limit not exceeded for r .gt.   2.47
+| APPROXIMATING direct force using CUBIC SPLINE INTERPOLATION
+|  with   50.0 points per unit in tabled values
+| Relative Error Limit not exceeded for r .gt.   2.89
+|---------------------------------------------------
+
+ NSTEP =        1   TIME(PS) =       1.001  TEMP(K) =   298.30  PRESS =     0.0
+ Etot   =    -32104.0580  EKtot   =      7283.2793  EPtot      =    -39387.3373
+ BOND   =         0.0000  ANGLE   =         0.0000  DIHED      =         0.0000
+ 1-4 NB =         0.0000  1-4 EEL =         0.0000  VDWAALS    =      6028.9498
+ EELEC  =    -45371.6355  EHBOND  =         0.0000  RESTRAINT  =         0.0000
+ ELECTRIC_FIELD =       -44.6516
+ ------------------------------------------------------------------------------
+
+
+ NSTEP =        2   TIME(PS) =       1.002  TEMP(K) =   298.05  PRESS =     0.0
+ Etot   =    -32104.1594  EKtot   =      7277.1255  EPtot      =    -39381.2849
+ BOND   =         0.0000  ANGLE   =         0.0000  DIHED      =         0.0000
+ 1-4 NB =         0.0000  1-4 EEL =         0.0000  VDWAALS    =      6029.7276
+ EELEC  =    -45364.6731  EHBOND  =         0.0000  RESTRAINT  =         0.0000
+ ELECTRIC_FIELD =       -46.3395
+ ------------------------------------------------------------------------------
+
+
+ NSTEP =        3   TIME(PS) =       1.003  TEMP(K) =   297.92  PRESS =     0.0
+ Etot   =    -32104.1068  EKtot   =      7273.8682  EPtot      =    -39377.9750
+ BOND   =         0.0000  ANGLE   =         0.0000  DIHED      =         0.0000
+ 1-4 NB =         0.0000  1-4 EEL =         0.0000  VDWAALS    =      6030.3281
+ EELEC  =    -45359.2765  EHBOND  =         0.0000  RESTRAINT  =         0.0000
+ ELECTRIC_FIELD =       -49.0266
+ ------------------------------------------------------------------------------
+
+
+ NSTEP =        4   TIME(PS) =       1.004  TEMP(K) =   297.90  PRESS =     0.0
+ Etot   =    -32104.1156  EKtot   =      7273.3271  EPtot      =    -39377.4427
+ BOND   =         0.0000  ANGLE   =         0.0000  DIHED      =         0.0000
+ 1-4 NB =         0.0000  1-4 EEL =         0.0000  VDWAALS    =      6030.8484
+ EELEC  =    -45355.6109  EHBOND  =         0.0000  RESTRAINT  =         0.0000
+ ELECTRIC_FIELD =       -52.6802
+ ------------------------------------------------------------------------------
+
+
+ NSTEP =        5   TIME(PS) =       1.005  TEMP(K) =   297.97  PRESS =     0.0
+ Etot   =    -32104.2285  EKtot   =      7275.1592  EPtot      =    -39379.3876
+ BOND   =         0.0000  ANGLE   =         0.0000  DIHED      =         0.0000
+ 1-4 NB =         0.0000  1-4 EEL =         0.0000  VDWAALS    =      6031.2649
+ EELEC  =    -45353.3984  EHBOND  =         0.0000  RESTRAINT  =         0.0000
+ ELECTRIC_FIELD =       -57.2542
+ ------------------------------------------------------------------------------
+
+
+ NSTEP =        6   TIME(PS) =       1.006  TEMP(K) =   298.12  PRESS =     0.0
+ Etot   =    -32104.2478  EKtot   =      7278.9058  EPtot      =    -39383.1536
+ BOND   =         0.0000  ANGLE   =         0.0000  DIHED      =         0.0000
+ 1-4 NB =         0.0000  1-4 EEL =         0.0000  VDWAALS    =      6031.6949
+ EELEC  =    -45352.1598  EHBOND  =         0.0000  RESTRAINT  =         0.0000
+ ELECTRIC_FIELD =       -62.6887
+ ------------------------------------------------------------------------------
+
+
+ NSTEP =        7   TIME(PS) =       1.007  TEMP(K) =   298.34  PRESS =     0.0
+ Etot   =    -32104.3462  EKtot   =      7284.0508  EPtot      =    -39388.3970
+ BOND   =         0.0000  ANGLE   =         0.0000  DIHED      =         0.0000
+ 1-4 NB =         0.0000  1-4 EEL =         0.0000  VDWAALS    =      6032.1743
+ EELEC  =    -45351.6595  EHBOND  =         0.0000  RESTRAINT  =         0.0000
+ ELECTRIC_FIELD =       -68.9118
+ ------------------------------------------------------------------------------
+
+
+ NSTEP =        8   TIME(PS) =       1.008  TEMP(K) =   298.58  PRESS =     0.0
+ Etot   =    -32104.3557  EKtot   =      7290.0723  EPtot      =    -39394.4280
+ BOND   =         0.0000  ANGLE   =         0.0000  DIHED      =         0.0000
+ 1-4 NB =         0.0000  1-4 EEL =         0.0000  VDWAALS    =      6032.6290
+ EELEC  =    -45351.2184  EHBOND  =         0.0000  RESTRAINT  =         0.0000
+ ELECTRIC_FIELD =       -75.8385
+ ------------------------------------------------------------------------------
+
+
+ NSTEP =        9   TIME(PS) =       1.009  TEMP(K) =   298.84  PRESS =     0.0
+ Etot   =    -32104.3253  EKtot   =      7296.4829  EPtot      =    -39400.8082
+ BOND   =         0.0000  ANGLE   =         0.0000  DIHED      =         0.0000
+ 1-4 NB =         0.0000  1-4 EEL =         0.0000  VDWAALS    =      6033.1392
+ EELEC  =    -45350.5708  EHBOND  =         0.0000  RESTRAINT  =         0.0000
+ ELECTRIC_FIELD =       -83.3766
+ ------------------------------------------------------------------------------
+
+
+ NSTEP =       10   TIME(PS) =       1.010  TEMP(K) =   299.11  PRESS =     0.0
+ Etot   =    -32104.3256  EKtot   =      7302.8706  EPtot      =    -39407.1962
+ BOND   =         0.0000  ANGLE   =         0.0000  DIHED      =         0.0000
+ 1-4 NB =         0.0000  1-4 EEL =         0.0000  VDWAALS    =      6033.6669
+ EELEC  =    -45349.4389  EHBOND  =         0.0000  RESTRAINT  =         0.0000
+ ELECTRIC_FIELD =       -91.4243
+ ------------------------------------------------------------------------------
+
+
+      A V E R A G E S   O V E R      10 S T E P S
+
+
+ NSTEP =       10   TIME(PS) =       1.010  TEMP(K) =   298.31  PRESS =     0.0
+ Etot   =    -32104.2269  EKtot   =      7283.5142  EPtot      =    -39387.7411
+ BOND   =         0.0000  ANGLE   =         0.0000  DIHED      =         0.0000
+ 1-4 NB =         0.0000  1-4 EEL =         0.0000  VDWAALS    =      6031.4423
+ EELEC  =    -45355.9642  EHBOND  =         0.0000  RESTRAINT  =         0.0000
+ ELECTRIC_FIELD =       -63.2192
+ ------------------------------------------------------------------------------
+
+
+      R M S  F L U C T U A T I O N S
+
+
+ NSTEP =       10   TIME(PS) =       1.010  TEMP(K) =     0.39  PRESS =     0.0
+ Etot   =         0.1052  EKtot   =         9.5650  EPtot      =         9.6359
+ BOND   =         0.0000  ANGLE   =         0.0000  DIHED      =         0.0000
+ 1-4 NB =         0.0000  1-4 EEL =         0.0000  VDWAALS    =         1.4358
+ EELEC  =         6.8287  EHBOND  =         0.0000  RESTRAINT  =         0.0000
+ ELECTRIC_FIELD =        15.3998
+|E(PBS) =         0.0578
+ ------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+   5.  TIMINGS
+--------------------------------------------------------------------------------
+
+|  NonSetup CPU Time in Major Routines:
+|
+|     Routine           Sec        %
+|     ------------------------------
+|     Nonbond           0.01   62.07
+|     Bond              0.00    0.00
+|     Angle             0.00    0.00
+|     Dihedral          0.00    0.00
+|     Shake             0.00    0.30
+|     RunMD             0.00   37.44
+|     Other             0.00    0.19
+|     ------------------------------
+|     Total             0.01
+
+|  PME Nonbond Pairlist CPU Time:
+|
+|     Routine              Sec        %
+|     ---------------------------------
+|     Set Up Cit           0.00    0.00
+|     Build List           0.00    0.00
+|     ---------------------------------
+|     Total                0.00    0.00
+
+|  PME Direct Force CPU Time:
+|
+|     Routine              Sec        %
+|     ---------------------------------
+|     NonBonded Calc       0.00    0.00
+|     Exclude Masked       0.00    0.00
+|     Other                0.00    0.04
+|     ---------------------------------
+|     Total                0.00    0.04
+
+|  PME Reciprocal Force CPU Time:
+|
+|     Routine              Sec        %
+|     ---------------------------------
+|     1D bspline           0.00    0.00
+|     Grid Charges         0.00    0.00
+|     Scalar Sum           0.00    0.00
+|     Gradient Sum         0.00    0.00
+|     FFT                  0.00    0.00
+|     ---------------------------------
+|     Total                0.00    0.00
+
+|  Final Performance Info:
+|     -----------------------------------------------------
+|     Average timings for last       9 steps:
+|     Elapsed(s) =       0.02 Per Step(ms) =       1.95
+|         ns/day =      44.21   seconds/ns =    1954.43
+|
+|     Average timings for all steps:
+|     Elapsed(s) =       0.02 Per Step(ms) =       2.19
+|         ns/day =      39.42   seconds/ns =    2191.70
+|     -----------------------------------------------------
+
+|  Setup CPU time:            0.60 seconds
+|  NonSetup CPU time:         0.01 seconds
+|  Total CPU time:            0.61 seconds     0.00 hours
+
+|  Setup wall time:           0    seconds
+|  NonSetup wall time:        0    seconds
+|  Total wall time:           0    seconds     0.00 hours
diff --git test/cuda/Makefile test/cuda/Makefile
index 62eeb94..c2007c3 100644
--- test/cuda/Makefile
+++ test/cuda/Makefile
@@ -102,6 +102,7 @@ test.pmemd.cuda.pme:
 	-cd FactorIX_NVE/ && ./Run.FactorIX_NVE  $(PREC_MODEL) $(NETCDF)
 	-cd 4096wat/ && ./Run.pure_wat  $(PREC_MODEL) $(NETCDF)
 	-cd 4096wat/ && ./Run.pure_wat_fswitch $(PREC_MODEL) $(NETCDF)
+	-cd 4096wat/ && ./Run.pure_wat_efield $(PREC_MODEL) $(NETCDF)
 	-cd 4096wat_oct/ && ./Run.pure_wat_oct_NVE  $(PREC_MODEL) $(NETCDF)
 	-cd 4096wat_oct/ && ./Run.pure_wat_oct_NVT_NTT1  $(PREC_MODEL) $(NETCDF)
 	-cd 4096wat_oct/ && ./Run.pure_wat_oct_NPT_NTT1  $(PREC_MODEL) $(NETCDF)