Dear Lammps user,
I have installed the 23Jun2022 version of LAMMPS on two differnet machines. The first one has an Intel CPU and the CPU on the second one is an AMD.
Running my simulation with the same number of threads, the intel machine is four times faster. I suspect it is due to SIMD instructions in the intel package.
I was wondering if there is anyway that I can reach the same speed up on the machine with an AMD CPU?
Any help is highly appreciated in advance.
Mahdi
These are the specifications of the CPUs and how I install LAMMPS on each machine:
Machine-1
CPU: Intel(R) Xeon(R) Gold 6226R CPU @ 2.90GHz
LAMMPS version: 23 Jun 2022
Installation:
install.sh {
#!/bin/sh
rm -rf build-intel-most5
mkdir build-intel-most5
cd build-intel-most5
cmake -C …/cmake/presets/intel.cmake -C …/cmake/presets/most4.cmake -D BUILD_MPI=yes -D FFT=MKL -D FFT_SINGLE=yes -D INTEL_LRT_MODE=c++11 …/cmake
cmake --build . --parallel
}
most4.cmake {
preset that turns on a wide range of packages, some of which require
external libraries. Compared to all_on.cmake some more unusual packages
are removed. The resulting binary should be able to run most inputs.
set(ALL_PACKAGES
CORESHELL
INTEL
GPU
ASPHERE
BODY
BROWNIAN
EXTRA-PAIR
DIELECTRIC
DIPOLE
DRUDE
FEP
GRANULAR
INTERLAYER
KSPACE
MANYBODY
MISC
MOLECULE
QEQ
REACTION
REAXFF
REPLICA
RIGID
OPENMP
EXTRA-FIX
EXTRA-DUMP)
foreach(PKG {ALL_PACKAGES})
set(PKG_{PKG} ON CACHE BOOL “” FORCE)
endforeach()
set(BUILD_TOOLS ON CACHE BOOL “” FORCE)
}
intel.cmake {
preset that will enable Intel compilers with support for MPI and OpenMP (on Linux boxes)
set(CMAKE_CXX_COMPILER “icpc” CACHE STRING “” FORCE)
set(CMAKE_C_COMPILER “icc” CACHE STRING “” FORCE)
set(CMAKE_Fortran_COMPILER “ifort” CACHE STRING “” FORCE)
set(CMAKE_CXX_FLAGS_DEBUG “-Wall -Wextra -g” CACHE STRING “” FORCE)
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO “-Wall -Wextra -g -O2 -DNDEBUG” CACHE STRING “” FORCE)
set(CMAKE_CXX_FLAGS_RELEASE “-O3 -DNDEBUG” CACHE STRING “” FORCE)
set(CMAKE_Fortran_FLAGS_DEBUG “-Wall -Wextra -g” CACHE STRING “” FORCE)
set(CMAKE_Fortran_FLAGS_RELWITHDEBINFO “-Wall -Wextra -g -O2 -DNDEBUG” CACHE STRING “” FORCE)
set(CMAKE_Fortran_FLAGS_RELEASE “-O3 -DNDEBUG” CACHE STRING “” FORCE)
set(CMAKE_C_FLAGS_DEBUG “-Wall -Wextra -g” CACHE STRING “” FORCE)
set(CMAKE_C_FLAGS_RELWITHDEBINFO “-Wall -Wextra -g -O2 -DNDEBUG” CACHE STRING “” FORCE)
set(CMAKE_C_FLAGS_RELEASE “-O3 -DNDEBUG” CACHE STRING “” FORCE)
set(MPI_CXX “icpc” CACHE STRING “” FORCE)
set(MPI_CXX_COMPILER “mpicxx” CACHE STRING “” FORCE)
unset(HAVE_OMP_H_INCLUDE CACHE)
set(OpenMP_C “icc” CACHE STRING “” FORCE)
set(OpenMP_C_FLAGS “-qopenmp” CACHE STRING “” FORCE)
set(OpenMP_C_LIB_NAMES “omp” CACHE STRING “” FORCE)
set(OpenMP_CXX “icpc” CACHE STRING “” FORCE)
set(OpenMP_CXX_FLAGS “-qopenmp” CACHE STRING “” FORCE)
set(OpenMP_CXX_LIB_NAMES “omp” CACHE STRING “” FORCE)
set(OpenMP_Fortran_FLAGS “-qopenmp” CACHE STRING “” FORCE)
set(OpenMP_omp_LIBRARY “libiomp5.so” CACHE PATH “” FORCE)
}
Machine-2
CPU: AMD EPYC 7702
LAMMPS version 22Jun2023
install.sh {
#!/bin/sh
module load oneapi
module load intelmpi/2021.6
module load mkl/2019.6
rm -rf build-most9
mkdir build-most9
cd build-most9
cmake -C …/cmake/presets/kokkos-amd.cmake -C …/cmake/presets/most4.cmake -D BUILD_MPI=yes -D FFT_SINGLE=yes …/cmake
cmake --build . --parallel
}
kokkos-amd.cmake {
set(PKG_KOKKOS ON CACHE BOOL “” FORCE)
set(Kokkos_ARCH_ZEN2 ON CACHE BOOL “” FORCE)
set(BUILD_OMP ON CACHE BOOL “” FORCE)
hide deprecation warnings temporarily for stable release
set(Kokkos_ENABLE_DEPRECATION_WARNINGS OFF CACHE BOOL “” FORCE)
Enable OpenMP execution space
set(Kokkos_ENABLE_OPENMP ON CACHE BOOL “” FORCE)
}
most4.cmake {
preset that turns on a wide range of packages, some of which require
external libraries. Compared to all_on.cmake some more unusual packages
are removed. The resulting binary should be able to run most inputs.
set(ALL_PACKAGES
CORESHELL
INTEL
ASPHERE
BODY
BROWNIAN
EXTRA-PAIR
DIELECTRIC
DIPOLE
DRUDE
FEP
GRANULAR
INTERLAYER
KSPACE
MANYBODY
MISC
MOLECULE
QEQ
REACTION
REAXFF
REPLICA
RIGID
OPENMP
EXTRA-FIX
EXTRA-DUMP)
foreach(PKG {ALL_PACKAGES})
set(PKG_{PKG} ON CACHE BOOL “” FORCE)
endforeach()
set(BUILD_TOOLS ON CACHE BOOL “” FORCE)
}