Artem-B/abi_break_workaround.cc

## abi_break_workaround.cc
// ABI compatibility shims for  CUDA-11.7.
// Patch affected libraries with:
// objcopy \
// --redefine-sym cudaCreateTextureObject=cudaCreateTextureObject_v115 \
// --redefine-sym cudaGetTextureObjectTextureDesc=cudaGetTextureObjectTextureDesc_v115 \
// --redefine-sym cublasGetVersion_v2=cublasGetVersion_v2_v115 \
// --redefine-sym cublasLtGetVersion=cublasLtGetVersion_v115 \
// libnvinfer_static.a libcudnn_static.a
//

#include <string.h>

#include <algorithm>

#include "cublas.h"
#include "cublasLt.h"
#include "cuda.h"
#include "cuda_runtime_api.h"

// Tensorrt v7/v8 binaries we have now were built with the older version of
// cuBLAS which encoded its own version differently. Until we update TensorRT we
// must convert the new encoding scheme into something TRT can live with.

// TensorRT static libraries are patched to call this `cublasGetVersion_v115`
// shim instead of the normal cublasGetVersion_v2. The shim converts the version
// from `MMmmpp` encoding into `MMmpp` expected by TensorRT.

namespace {
size_t ConvertVersionEncoding(size_t real_version) {
  size_t patch = real_version % 100;
  // Old encoding only had one digit available for the minor version, so it
  // can't get higher than 9.
  size_t minor = std::min<size_t>(9, (real_version / 100) % 100);
  size_t major = real_version / 10000;
  return major * 1000 + minor * 100 + patch;
}
}  // namespace

extern "C" cublasStatus_t cublasGetVersion_v2_v115(cublasHandle_t handle,
                                                   int *version) {
#if CUDA_VERSION >= 11070
  int real_version;
  cublasStatus_t result = cublasGetVersion_v2(handle, &real_version);
  *version = ConvertVersionEncoding(real_version);
  return result;
#else
  // Just pass-through the call to cuBLAS.
  return cublasGetVersion_v2(handle, version);
#endif
}

extern "C" size_t cublasLtGetVersion_v115(void) {
#if CUDA_VERSION >= 11070
  return ConvertVersionEncoding(cublasLtGetVersion());
#else
  return cublasLtGetVersion();
#endif
}

// cudaTextureDesc as it was in CUDA-11.5
struct __device_builtin__ cudaTextureDesc_v115 {
  /**
   * Texture address mode for up to 3 dimensions
   */
  enum cudaTextureAddressMode addressMode[3];
  /**
   * Texture filter mode
   */
  enum cudaTextureFilterMode filterMode;
  /**
   * Texture read mode
   */
  enum cudaTextureReadMode readMode;
  /**
   * Perform sRGB->linear conversion during texture read
   */
  int sRGB;
  /**
   * Texture Border Color
   */
  float borderColor[4];
  /**
   * Indicates whether texture reads are normalized or not
   */
  int normalizedCoords;
  /**
   * Limit to the anisotropy ratio
   */
  unsigned int maxAnisotropy;
  /**
   * Mipmap filter mode
   */
  enum cudaTextureFilterMode mipmapFilterMode;
  /**
   * Offset applied to the supplied mipmap level
   */
  float mipmapLevelBias;
  /**
   * Lower end of the mipmap level range to clamp access to
   */
  float minMipmapLevelClamp;
  /**
   * Upper end of the mipmap level range to clamp access to
   */
  float maxMipmapLevelClamp;
  /**
   * Disable any trilinear filtering optimizations.
   */
  int disableTrilinearOptimization;
};

extern "C" cudaError_t cudaCreateTextureObject_v115(
    cudaTextureObject_t *pTexObject, const struct cudaResourceDesc *pResDesc,
    const struct cudaTextureDesc_v115 *pTexDesc,
    const struct cudaResourceViewDesc *pResViewDesc) {
#if CUDA_VERSION >= 11070
  // Zero-init the full structure and then fill it in with the old-format data,
  // leaving new fields with the default value of 0.
  cudaTextureDesc texDesc = {};
  memcpy((void *)&texDesc, (void *)pTexDesc, sizeof(cudaTextureDesc_v115));
  return cudaCreateTextureObject(pTexObject, pResDesc, &texDesc, pResViewDesc);
#else
  return cudaCreateTextureObject(pTexObject, pResDesc,
                                 (cudaTextureDesc *)pTexDesc, pResViewDesc);
#endif
}

extern "C" cudaError_t cudaGetTextureObjectTextureDesc_v115(
    struct cudaTextureDesc_v115 *pTexDesc, cudaTextureObject_t texObject) {
#if CUDA_VERSION >= 11070
  cudaTextureDesc texDesc;
  // Provide the function with the correctly-sized buffer, and then copy the
  // fields the old version knew about.
  cudaError_t result = cudaGetTextureObjectTextureDesc(&texDesc, texObject);
  memcpy((void *)pTexDesc, (void *)&texDesc, sizeof(cudaTextureDesc_v115));
  return result;
#else
  return cudaGetTextureObjectTextureDesc((cudaTextureDesc *)pTexDesc,
                                         texObject);
#endif
}
	// ABI compatibility shims for CUDA-11.7.
	// Patch affected libraries with:
	// objcopy \
	// --redefine-sym cudaCreateTextureObject=cudaCreateTextureObject_v115 \
	// --redefine-sym cudaGetTextureObjectTextureDesc=cudaGetTextureObjectTextureDesc_v115 \
	// --redefine-sym cublasGetVersion_v2=cublasGetVersion_v2_v115 \
	// --redefine-sym cublasLtGetVersion=cublasLtGetVersion_v115 \
	// libnvinfer_static.a libcudnn_static.a
	//

	#include <string.h>

	#include <algorithm>

	#include "cublas.h"
	#include "cublasLt.h"
	#include "cuda.h"
	#include "cuda_runtime_api.h"

	// Tensorrt v7/v8 binaries we have now were built with the older version of
	// cuBLAS which encoded its own version differently. Until we update TensorRT we
	// must convert the new encoding scheme into something TRT can live with.

	// TensorRT static libraries are patched to call this `cublasGetVersion_v115`
	// shim instead of the normal cublasGetVersion_v2. The shim converts the version
	// from `MMmmpp` encoding into `MMmpp` expected by TensorRT.

	namespace {
	size_t ConvertVersionEncoding(size_t real_version) {
	size_t patch = real_version % 100;
	// Old encoding only had one digit available for the minor version, so it
	// can't get higher than 9.
	size_t minor = std::min<size_t>(9, (real_version / 100) % 100);
	size_t major = real_version / 10000;
	return major * 1000 + minor * 100 + patch;
	}
	} // namespace

	extern "C" cublasStatus_t cublasGetVersion_v2_v115(cublasHandle_t handle,
	int *version) {
	#if CUDA_VERSION >= 11070
	int real_version;
	cublasStatus_t result = cublasGetVersion_v2(handle, &real_version);
	*version = ConvertVersionEncoding(real_version);
	return result;
	#else
	// Just pass-through the call to cuBLAS.
	return cublasGetVersion_v2(handle, version);
	#endif
	}

	extern "C" size_t cublasLtGetVersion_v115(void) {
	#if CUDA_VERSION >= 11070
	return ConvertVersionEncoding(cublasLtGetVersion());
	#else
	return cublasLtGetVersion();
	#endif
	}

	// cudaTextureDesc as it was in CUDA-11.5
	struct __device_builtin__ cudaTextureDesc_v115 {
	/**
	* Texture address mode for up to 3 dimensions
	*/
	enum cudaTextureAddressMode addressMode[3];
	/**
	* Texture filter mode
	*/
	enum cudaTextureFilterMode filterMode;
	/**
	* Texture read mode
	*/
	enum cudaTextureReadMode readMode;
	/**
	* Perform sRGB->linear conversion during texture read
	*/
	int sRGB;
	/**
	* Texture Border Color
	*/
	float borderColor[4];
	/**
	* Indicates whether texture reads are normalized or not
	*/
	int normalizedCoords;
	/**
	* Limit to the anisotropy ratio
	*/
	unsigned int maxAnisotropy;
	/**
	* Mipmap filter mode
	*/
	enum cudaTextureFilterMode mipmapFilterMode;
	/**
	* Offset applied to the supplied mipmap level
	*/
	float mipmapLevelBias;
	/**
	* Lower end of the mipmap level range to clamp access to
	*/
	float minMipmapLevelClamp;
	/**
	* Upper end of the mipmap level range to clamp access to
	*/
	float maxMipmapLevelClamp;
	/**
	* Disable any trilinear filtering optimizations.
	*/
	int disableTrilinearOptimization;
	};

	extern "C" cudaError_t cudaCreateTextureObject_v115(
	cudaTextureObject_t pTexObject, const struct cudaResourceDesc pResDesc,
	const struct cudaTextureDesc_v115 *pTexDesc,
	const struct cudaResourceViewDesc *pResViewDesc) {
	#if CUDA_VERSION >= 11070
	// Zero-init the full structure and then fill it in with the old-format data,
	// leaving new fields with the default value of 0.
	cudaTextureDesc texDesc = {};
	memcpy((void )&texDesc, (void )pTexDesc, sizeof(cudaTextureDesc_v115));
	return cudaCreateTextureObject(pTexObject, pResDesc, &texDesc, pResViewDesc);
	#else
	return cudaCreateTextureObject(pTexObject, pResDesc,
	(cudaTextureDesc *)pTexDesc, pResViewDesc);
	#endif
	}

	extern "C" cudaError_t cudaGetTextureObjectTextureDesc_v115(
	struct cudaTextureDesc_v115 *pTexDesc, cudaTextureObject_t texObject) {
	#if CUDA_VERSION >= 11070
	cudaTextureDesc texDesc;
	// Provide the function with the correctly-sized buffer, and then copy the
	// fields the old version knew about.
	cudaError_t result = cudaGetTextureObjectTextureDesc(&texDesc, texObject);
	memcpy((void )pTexDesc, (void )&texDesc, sizeof(cudaTextureDesc_v115));
	return result;
	#else
	return cudaGetTextureObjectTextureDesc((cudaTextureDesc *)pTexDesc,
	texObject);
	#endif
	}