Skip to content

Instantly share code, notes, and snippets.

@Artem-B
Created June 21, 2022 19:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Artem-B/fc7a40ba69b37e09a8abacd927caa71b to your computer and use it in GitHub Desktop.
Save Artem-B/fc7a40ba69b37e09a8abacd927caa71b to your computer and use it in GitHub Desktop.
// ABI compatibility shims for CUDA-11.7.
// Patch affected libraries with:
// objcopy \
// --redefine-sym cudaCreateTextureObject=cudaCreateTextureObject_v115 \
// --redefine-sym cudaGetTextureObjectTextureDesc=cudaGetTextureObjectTextureDesc_v115 \
// --redefine-sym cublasGetVersion_v2=cublasGetVersion_v2_v115 \
// --redefine-sym cublasLtGetVersion=cublasLtGetVersion_v115 \
// libnvinfer_static.a libcudnn_static.a
//
#include <string.h>
#include <algorithm>
#include "cublas.h"
#include "cublasLt.h"
#include "cuda.h"
#include "cuda_runtime_api.h"
// Tensorrt v7/v8 binaries we have now were built with the older version of
// cuBLAS which encoded its own version differently. Until we update TensorRT we
// must convert the new encoding scheme into something TRT can live with.
// TensorRT static libraries are patched to call this `cublasGetVersion_v115`
// shim instead of the normal cublasGetVersion_v2. The shim converts the version
// from `MMmmpp` encoding into `MMmpp` expected by TensorRT.
namespace {
size_t ConvertVersionEncoding(size_t real_version) {
size_t patch = real_version % 100;
// Old encoding only had one digit available for the minor version, so it
// can't get higher than 9.
size_t minor = std::min<size_t>(9, (real_version / 100) % 100);
size_t major = real_version / 10000;
return major * 1000 + minor * 100 + patch;
}
} // namespace
extern "C" cublasStatus_t cublasGetVersion_v2_v115(cublasHandle_t handle,
int *version) {
#if CUDA_VERSION >= 11070
int real_version;
cublasStatus_t result = cublasGetVersion_v2(handle, &real_version);
*version = ConvertVersionEncoding(real_version);
return result;
#else
// Just pass-through the call to cuBLAS.
return cublasGetVersion_v2(handle, version);
#endif
}
extern "C" size_t cublasLtGetVersion_v115(void) {
#if CUDA_VERSION >= 11070
return ConvertVersionEncoding(cublasLtGetVersion());
#else
return cublasLtGetVersion();
#endif
}
// cudaTextureDesc as it was in CUDA-11.5
struct __device_builtin__ cudaTextureDesc_v115 {
/**
* Texture address mode for up to 3 dimensions
*/
enum cudaTextureAddressMode addressMode[3];
/**
* Texture filter mode
*/
enum cudaTextureFilterMode filterMode;
/**
* Texture read mode
*/
enum cudaTextureReadMode readMode;
/**
* Perform sRGB->linear conversion during texture read
*/
int sRGB;
/**
* Texture Border Color
*/
float borderColor[4];
/**
* Indicates whether texture reads are normalized or not
*/
int normalizedCoords;
/**
* Limit to the anisotropy ratio
*/
unsigned int maxAnisotropy;
/**
* Mipmap filter mode
*/
enum cudaTextureFilterMode mipmapFilterMode;
/**
* Offset applied to the supplied mipmap level
*/
float mipmapLevelBias;
/**
* Lower end of the mipmap level range to clamp access to
*/
float minMipmapLevelClamp;
/**
* Upper end of the mipmap level range to clamp access to
*/
float maxMipmapLevelClamp;
/**
* Disable any trilinear filtering optimizations.
*/
int disableTrilinearOptimization;
};
extern "C" cudaError_t cudaCreateTextureObject_v115(
cudaTextureObject_t *pTexObject, const struct cudaResourceDesc *pResDesc,
const struct cudaTextureDesc_v115 *pTexDesc,
const struct cudaResourceViewDesc *pResViewDesc) {
#if CUDA_VERSION >= 11070
// Zero-init the full structure and then fill it in with the old-format data,
// leaving new fields with the default value of 0.
cudaTextureDesc texDesc = {};
memcpy((void *)&texDesc, (void *)pTexDesc, sizeof(cudaTextureDesc_v115));
return cudaCreateTextureObject(pTexObject, pResDesc, &texDesc, pResViewDesc);
#else
return cudaCreateTextureObject(pTexObject, pResDesc,
(cudaTextureDesc *)pTexDesc, pResViewDesc);
#endif
}
extern "C" cudaError_t cudaGetTextureObjectTextureDesc_v115(
struct cudaTextureDesc_v115 *pTexDesc, cudaTextureObject_t texObject) {
#if CUDA_VERSION >= 11070
cudaTextureDesc texDesc;
// Provide the function with the correctly-sized buffer, and then copy the
// fields the old version knew about.
cudaError_t result = cudaGetTextureObjectTextureDesc(&texDesc, texObject);
memcpy((void *)pTexDesc, (void *)&texDesc, sizeof(cudaTextureDesc_v115));
return result;
#else
return cudaGetTextureObjectTextureDesc((cudaTextureDesc *)pTexDesc,
texObject);
#endif
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment