Skip to content

Instantly share code, notes, and snippets.

Forked from janosh/
Created September 17, 2022 13:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ruixingw/fb731ae8438ed1f82a41372e442856e6 to your computer and use it in GitHub Desktop.
Save ruixingw/fb731ae8438ed1f82a41372e442856e6 to your computer and use it in GitHub Desktop.
VASP M1 Mac Compilation Guide

Compile VASP on M1 Mac

Courtesy of Alex Ganose @utf with additions from yours truly @janosh. Dated Mar 28, 2022.

  1. Install Xcode command line tools

    xcode-select --install
  2. Install gcc, OpenMPI and OpenMP using homebrew

    brew install gcc openmpi scalapack fftw qd openblas

    Consider appending hdf5 if you want to compile VASP with HDF5 support.

  3. Compile VASP

    These instructions are for VASP 6.3.0; they should be transferable to other versions of VASP but the variable names may be different

    cd /path/to/vasp-6.x.y
    cp arch/makefile.include.gnu_omp makefile.include

    Then edit makefile.include as follows:

    • Add the following to CPP_OPTIONS:

      -D_OPENMP \
    • Change all instances of "gcc" to "gcc-11" and "g++" to "g++-11"

    • Add the following lines after "LLIBS = -lstdc++". This is necessary to emulate quad precision.

      QD         ?= /opt/homebrew/
      LLIBS      += -L$(QD)/lib -lqdmod -lqd
      INCS       += -I$(QD)/include/qd
    • Set SCALAPACK_ROOT ?= /opt/homebrew

    • Set OPENBLAS_ROOT ?= /opt/homebrew/Cellar/openblas/0.3.20 (Double check this is the path on your system)

    • Set FFTW_ROOT ?= /opt/homebrew

    • (optional but recommended by VASP) For HDF5 support, add

      HDF5_ROOT  ?= /opt/homebrew/
      LLIBS      += -L$(HDF5_ROOT)/lib -lhdf5_fortran
      INCS       += -I$(HDF5_ROOT)/include
  4. Finally, run:

    make all

    If a previous compilation failed, remember to run make veryclean to start from a clean slate. Fixes gfortran errors like

    Fatal Error: string.mod not found

Resulting makefile.include with all modifications

See makefile.include below.


Initial performance testing suggests optimal parameters are

export OMP_NUM_THREADS=1 # important
mpiexec -np 8 vasp_std
NCORE = 4 # in INCAR
n_proc n_threads n_core elapsed (sec)
0 1 1 2 93.3
1 1 1 4 92.8
2 1 2 2 82.8
3 1 2 4 82.7
4 2 1 2 42.8
5 2 1 4 42.9
6 2 2 2 52.9
7 2 2 4 52.7
8 4 1 2 32.9
9 4 1 4 32.9
10 4 2 2 52.9
11 4 2 4 53.0
12 8 1 2 32.8
13 8 1 4 22.8
14 8 2 2 62.8
15 8 2 4 62.9

Brings wall time for this Si2 relaxation down to 23 sec.

from time import perf_counter

from import RelaxMaker
from atomate2.vasp.powerups import update_user_incar_settings
from jobflow import run_locally
from pymatgen.core import Structure

start = perf_counter()

# FCC silicon structure
si_structure = Structure(
    lattice=[[0, 2.73, 2.73], [2.73, 0, 2.73], [2.73, 2.73, 0]],
    species=["Si", "Si"],
    coords=[[0, 0, 0], [0.25, 0.25, 0.25]],

# relax job to optimize structure
relax_job = RelaxMaker().make(si_structure)

relax_job = update_user_incar_settings(relax_job, {"NCORE": 4})

# run job
run_locally(relax_job, create_folders=True, ensure_success=True)

print(f"Si relaxation took {perf_counter() - start:.3f} sec")
# Default precompiler options
-DMPI -DMPI_BLOCK=8000 -Duse_collective \
-Davoidalloc \
-Dvasp6 \
-Duse_bse_te \
-Dtbdyn \
-Dfock_dblbuf \
CPP = gcc-11 -E -C -w $*$(FUFFIX) >$*$(SUFFIX) $(CPP_OPTIONS)
FC = mpif90 -fopenmp
FCL = mpif90 -fopenmp
FREE = -ffree-form -ffree-line-length-none
FFLAGS = -w -ffpe-summary=invalid,zero,overflow -L /opt/homebrew/Cellar/gcc/11.2.0_3/lib/gcc/11
OBJECTS = fftmpiw.o fftmpi_map.o fftw3d.o fft3dlib.o
OBJECTS_O1 += fftw3d.o fftmpi.o fftmpiw.o
OBJECTS_O2 += fft3dlib.o
# For what used to be vasp.5.lib
FC_LIB = $(FC)
CC_LIB = gcc-11
OBJECTS_LIB = linpack_double.o
# For the parser library
CXX_PARS = g++-11
LLIBS = -lstdc++
QD ?= /opt/homebrew
LLIBS += -L$(QD)/lib -lqdmod -lqd
INCS += -I$(QD)/include/qd
## Customize as of this point! Of course you may change the preceding
## part of this file as well if you like, but it should rarely be
## necessary ...
# When compiling on the target machine itself, change this to the
# relevant target when cross-compiling for another architecture
FFLAGS += -march=native
# For gcc-10 and higher (comment out for older versions)
FFLAGS += -fallow-argument-mismatch
# BLAS and LAPACK (mandatory)
OPENBLAS_ROOT ?= /opt/homebrew/Cellar/openblas/0.3.20
BLASPACK = -L$(OPENBLAS_ROOT)/lib -lopenblas
# scaLAPACK (mandatory)
SCALAPACK_ROOT ?= /opt/homebrew
SCALAPACK = -L$(SCALAPACK_ROOT)/lib -lscalapack
# FFTW (mandatory)
FFTW_ROOT ?= /opt/homebrew
LLIBS += -L$(FFTW_ROOT)/lib -lfftw3 -lfftw3_omp
INCS += -I$(FFTW_ROOT)/include
# HDF5-support (optional but strongly recommended)
#HDF5_ROOT ?= /path/to/your/hdf5/installation
#LLIBS += -L$(HDF5_ROOT)/lib -lhdf5_fortran
#INCS += -I$(HDF5_ROOT)/include
# For the VASP-2-Wannier90 interface (optional)
#WANNIER90_ROOT ?= /path/to/your/wannier90/installation
#LLIBS += -L$(WANNIER90_ROOT)/lib -lwannier
# For the fftlib library (experimental)
#FCL += fftlib.o
#CXX_FFTLIB = g++-11 -fopenmp -std=c++11 -DFFTLIB_THREADSAFE
#INCS_FFTLIB = -I./include -I$(FFTW_ROOT)/include
#LIBS += fftlib
#LLIBS += -ldl
"""This script grid-searches OMP_NUM_THREADS, NCORE and number of MPI processes for
minimal VASP runtime on a simple Si2 relaxation. It writes the results to CSV and copies
markdown table to clipboard. Requires Python 3.10. Invoke with
python 2>&1 | tee Si-relax.log
to keep a log.
import os
import warnings
from itertools import product
from time import perf_counter, sleep
import pandas as pd
from import RelaxMaker
from atomate2.vasp.powerups import update_user_incar_settings
from jobflow import run_locally
from import clipboard_set
from pymatgen.core import Structure
warnings.filterwarnings("ignore") # ignore pymatgen warnings clogging up the logs
VASP_BIN = "/Users/janosh/dev/vasp/compiled/vasp_std_6.3.0_m1"
results: list[tuple[int, int, int, float]] = []
# grid-search OMP_NUM_THREADS, NCORE and number of MPI processes
prod = list(product([1, 2, 4, 8], [1, 2], [2, 4]))
for idx, (n_proc, n_threads, n_core) in enumerate(prod, 1):
os.environ["OMP_NUM_THREADS"] = str(n_threads)
print(f"Run {idx} / {len(prod)}")
# construct an FCC silicon structure
si_structure = Structure(
lattice=[[0, 2.73, 2.73], [2.73, 0, 2.73], [2.73, 2.73, 0]],
species=["Si", "Si"],
coords=[[0, 0, 0], [0.25, 0.25, 0.25]],
# make a relax job to optimise the structure
relax_job = RelaxMaker(
run_vasp_kwargs={"vasp_cmd": f"mpiexec -np {n_proc} {VASP_BIN}"}
relax_job = update_user_incar_settings(relax_job, {"NCORE": n_core})
start = perf_counter()
# run the job
run_locally(relax_job, create_folders=True, ensure_success=True)
elapsed = perf_counter() - start
f"Si relaxation with {n_proc=}, {n_threads=}, {n_core=} took {elapsed:.1f} sec"
results.append((n_proc, n_threads, n_core, elapsed))
print("Waiting 10 secs to cooldown...\n\n", flush=True)
sleep(10) # so every run is a bit more like the first
except KeyboardInterrupt: # exit gracefully on ctrl+c and write partial results
df = pd.DataFrame(results, columns=["n_proc", "n_threads", "n_core", "elapsed"])
df.to_csv("vasp-perf-results.csv", float_format="%.2f")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment