Created
July 12, 2020 17:41
-
-
Save kounoike/425965a7cdd0e27b169152092c47d9f2 to your computer and use it in GitHub Desktop.
gpu_nms enabled setup.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from enum import Enum | |
class NMSType(Enum): | |
PY_NMS = 1 | |
CPU_NMS = 2 | |
GPU_NMS = 3 | |
default_nms_type = NMSType.PY_NMS | |
class NMSWrapper: | |
def __init__(self, nms_type=default_nms_type): | |
self._nms_type = nms_type | |
assert type(nms_type) == NMSType | |
if nms_type == NMSType.PY_NMS: | |
from nms.py_cpu_nms import py_cpu_nms | |
self._nms = py_cpu_nms | |
elif nms_type == NMSType.CPU_NMS: | |
from nms.cpu_nms import cpu_nms | |
self._nms = cpu_nms | |
elif nms_type == NMSType.GPU_NMS: | |
from nms.gpu_nms import gpu_nms | |
self._nms = gpu_nms | |
else: | |
raise ValueError('current nms type is not implemented yet') | |
def __call__(self, *args, **kwargs): | |
if self._nms_type == NMSType.GPU_NMS: | |
kwargs["device_id"] = 0 | |
return self._nms(*args, **kwargs) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -------------------------------------------------------- | |
# Fast R-CNN | |
# Copyright (c) 2015 Microsoft | |
# Licensed under The MIT License [see LICENSE for details] | |
# Written by Ross Girshick | |
# -------------------------------------------------------- | |
import os | |
from os.path import join as pjoin | |
import numpy as np | |
from distutils.core import setup | |
from distutils.extension import Extension | |
from Cython.Distutils import build_ext | |
import sys | |
# Obtain the numpy include directory. This logic works across numpy versions. | |
try: | |
numpy_include = np.get_include() | |
except AttributeError: | |
numpy_include = np.get_numpy_include() | |
def find_in_path(name, path): | |
"Find a file in a search path" | |
# Adapted fom | |
# http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/ | |
for dir in path.split(os.pathsep): | |
binpath = pjoin(dir, name) | |
if os.path.exists(binpath): | |
return os.path.abspath(binpath) | |
return None | |
def locate_cuda(): | |
"""Locate the CUDA environment on the system | |
Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64' | |
and values giving the absolute path to each directory. | |
Starts by looking for the CUDAHOME env variable. If not found, everything | |
is based on finding 'nvcc' in the PATH. | |
""" | |
# first check if the CUDAHOME env variable is in use | |
if 'CUDAHOME' in os.environ: | |
home = os.environ['CUDAHOME'] | |
nvcc = pjoin(home, 'bin', 'nvcc') | |
else: | |
# otherwise, search the PATH for NVCC | |
default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin') | |
nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path) | |
if nvcc is None: | |
raise EnvironmentError('The nvcc binary could not be ' | |
'located in your $PATH. Either add it to your path, or set $CUDAHOME') | |
home = os.path.dirname(os.path.dirname(nvcc)) | |
cudaconfig = {'home':home, 'nvcc':nvcc, | |
'include': pjoin(home, 'include'), | |
'lib64': pjoin(home, 'lib64')} | |
for k, v in cudaconfig.items(): | |
if not os.path.exists(v): | |
raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v)) | |
return cudaconfig | |
CUDA = locate_cuda() | |
def customize_compiler_for_nvcc(self): | |
"""inject deep into distutils to customize how the dispatch | |
to gcc/nvcc works. | |
If you subclass UnixCCompiler, it's not trivial to get your subclass | |
injected in, and still have the right customizations (i.e. | |
distutils.sysconfig.customize_compiler) run on it. So instead of going | |
the OO route, I have this. Note, it's kindof like a wierd functional | |
subclassing going on.""" | |
# tell the compiler it can processes .cu | |
self.src_extensions.append('.cu') | |
# save references to the default compiler_so and _comple methods | |
default_compiler_so = self.compiler_so | |
super = self._compile | |
# now redefine the _compile method. This gets executed for each | |
# object but distutils doesn't have the ability to change compilers | |
# based on source extension: we add it. | |
def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): | |
if os.path.splitext(src)[1] == '.cu': | |
# use the cuda for .cu files | |
self.set_executable('compiler_so', CUDA['nvcc']) | |
# use only a subset of the extra_postargs, which are 1-1 translated | |
# from the extra_compile_args in the Extension class | |
postargs = extra_postargs['nvcc'] | |
else: | |
postargs = extra_postargs['gcc'] | |
super(obj, src, ext, cc_args, postargs, pp_opts) | |
# reset the default compiler_so, which we might have changed for cuda | |
self.compiler_so = default_compiler_so | |
# inject our redefined _compile method into the class | |
self._compile = _compile | |
# run the customize_compiler | |
class custom_build_ext(build_ext): | |
def build_extensions(self): | |
customize_compiler_for_nvcc(self.compiler) | |
build_ext.build_extensions(self) | |
ext_modules = [ | |
Extension( | |
"nms.cpu_nms", | |
["nms/cpu_nms.pyx"], | |
extra_compile_args=["-Wno-cpp", "-Wno-unused-function"] if sys.platform == 'linux' else [], | |
include_dirs = [numpy_include] | |
), | |
Extension('nms.gpu_nms', | |
['nms/nms_kernel.cu', 'nms/gpu_nms.pyx'], | |
library_dirs=[CUDA['lib64']], | |
libraries=['cudart'], | |
language='c++', | |
runtime_library_dirs=[CUDA['lib64']], | |
# this syntax is specific to this build system | |
# we're only going to use certain compiler args with nvcc and not with | |
# gcc the implementation of this trick is in customize_compiler() below | |
extra_compile_args={'gcc': ["-Wno-unused-function"], | |
'nvcc': ['-arch=sm_35', | |
'--ptxas-options=-v', | |
'-c', | |
'--compiler-options', | |
"'-fPIC'"]}, | |
include_dirs = [numpy_include, CUDA['include']] | |
),] | |
setup( | |
name='tf_faster_rcnn', | |
ext_modules=ext_modules, | |
# inject our custom trigger | |
cmdclass={'build_ext': custom_build_ext}, | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment