main.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166
  1. """
  2. extract factors the build is dependent on:
  3. [X] compute capability
  4. [ ] TODO: Q - What if we have multiple GPUs of different makes?
  5. - CUDA version
  6. - Software:
  7. - CPU-only: only CPU quantization functions (no optimizer, no matrix multiple)
  8. - CuBLAS-LT: full-build 8-bit optimizer
  9. - no CuBLAS-LT: no 8-bit matrix multiplication (`nomatmul`)
  10. evaluation:
  11. - if paths faulty, return meaningful error
  12. - else:
  13. - determine CUDA version
  14. - determine capabilities
  15. - based on that set the default path
  16. """
  17. import ctypes
  18. from .paths import determine_cuda_runtime_lib_path
  19. def check_cuda_result(cuda, result_val):
  20. # 3. Check for CUDA errors
  21. if result_val != 0:
  22. error_str = ctypes.c_char_p()
  23. cuda.cuGetErrorString(result_val, ctypes.byref(error_str))
  24. print(f"CUDA exception! Error code: {error_str.value.decode()}")
  25. def get_cuda_version(cuda, cudart_path):
  26. # https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART____VERSION.html#group__CUDART____VERSION
  27. try:
  28. cudart = ctypes.CDLL(cudart_path)
  29. except OSError:
  30. # TODO: shouldn't we error or at least warn here?
  31. print(f'ERROR: libcudart.so could not be read from path: {cudart_path}!')
  32. return None
  33. version = ctypes.c_int()
  34. check_cuda_result(cuda, cudart.cudaRuntimeGetVersion(ctypes.byref(version)))
  35. version = int(version.value)
  36. major = version//1000
  37. minor = (version-(major*1000))//10
  38. if major < 11:
  39. print('CUDA SETUP: CUDA version lower than 11 are currently not supported for LLM.int8(). You will be only to use 8-bit optimizers and quantization routines!!')
  40. return f'{major}{minor}'
  41. def get_cuda_lib_handle():
  42. # 1. find libcuda.so library (GPU driver) (/usr/lib)
  43. try:
  44. cuda = ctypes.CDLL("libcuda.so")
  45. except OSError:
  46. # TODO: shouldn't we error or at least warn here?
  47. print('CUDA SETUP: WARNING! libcuda.so not found! Do you have a CUDA driver installed? If you are on a cluster, make sure you are on a CUDA machine!')
  48. return None
  49. check_cuda_result(cuda, cuda.cuInit(0))
  50. return cuda
  51. def get_compute_capabilities(cuda):
  52. """
  53. 1. find libcuda.so library (GPU driver) (/usr/lib)
  54. init_device -> init variables -> call function by reference
  55. 2. call extern C function to determine CC
  56. (https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__DEVICE__DEPRECATED.html)
  57. 3. Check for CUDA errors
  58. https://stackoverflow.com/questions/14038589/what-is-the-canonical-way-to-check-for-errors-using-the-cuda-runtime-api
  59. # bits taken from https://gist.github.com/f0k/63a664160d016a491b2cbea15913d549
  60. """
  61. nGpus = ctypes.c_int()
  62. cc_major = ctypes.c_int()
  63. cc_minor = ctypes.c_int()
  64. device = ctypes.c_int()
  65. check_cuda_result(cuda, cuda.cuDeviceGetCount(ctypes.byref(nGpus)))
  66. ccs = []
  67. for i in range(nGpus.value):
  68. check_cuda_result(cuda, cuda.cuDeviceGet(ctypes.byref(device), i))
  69. ref_major = ctypes.byref(cc_major)
  70. ref_minor = ctypes.byref(cc_minor)
  71. # 2. call extern C function to determine CC
  72. check_cuda_result(
  73. cuda, cuda.cuDeviceComputeCapability(ref_major, ref_minor, device)
  74. )
  75. ccs.append(f"{cc_major.value}.{cc_minor.value}")
  76. return ccs
  77. # def get_compute_capability()-> Union[List[str, ...], None]: # FIXME: error
  78. def get_compute_capability(cuda):
  79. """
  80. Extracts the highest compute capbility from all available GPUs, as compute
  81. capabilities are downwards compatible. If no GPUs are detected, it returns
  82. None.
  83. """
  84. ccs = get_compute_capabilities(cuda)
  85. if ccs is not None:
  86. # TODO: handle different compute capabilities; for now, take the max
  87. return ccs[-1]
  88. return None
  89. def evaluate_cuda_setup():
  90. print('')
  91. print('='*35 + 'BUG REPORT' + '='*35)
  92. print('Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues')
  93. print('For effortless bug reporting copy-paste your error into this form: https://docs.google.com/forms/d/e/1FAIpQLScPB8emS3Thkp66nvqwmjTEgxp8Y9ufuWTzFyr9kJ5AoI47dQ/viewform?usp=sf_link')
  94. print('='*80)
  95. return "libbitsandbytes_cuda116.dll" # $$$
  96. binary_name = "libbitsandbytes_cpu.so"
  97. #if not torch.cuda.is_available():
  98. #print('No GPU detected. Loading CPU library...')
  99. #return binary_name
  100. cudart_path = determine_cuda_runtime_lib_path()
  101. if cudart_path is None:
  102. print(
  103. "WARNING: No libcudart.so found! Install CUDA or the cudatoolkit package (anaconda)!"
  104. )
  105. return binary_name
  106. print(f"CUDA SETUP: CUDA runtime path found: {cudart_path}")
  107. cuda = get_cuda_lib_handle()
  108. cc = get_compute_capability(cuda)
  109. print(f"CUDA SETUP: Highest compute capability among GPUs detected: {cc}")
  110. cuda_version_string = get_cuda_version(cuda, cudart_path)
  111. if cc == '':
  112. print(
  113. "WARNING: No GPU detected! Check your CUDA paths. Processing to load CPU-only library..."
  114. )
  115. return binary_name
  116. # 7.5 is the minimum CC vor cublaslt
  117. has_cublaslt = cc in ["7.5", "8.0", "8.6"]
  118. # TODO:
  119. # (1) CUDA missing cases (no CUDA installed by CUDA driver (nvidia-smi accessible)
  120. # (2) Multiple CUDA versions installed
  121. # we use ls -l instead of nvcc to determine the cuda version
  122. # since most installations will have the libcudart.so installed, but not the compiler
  123. print(f'CUDA SETUP: Detected CUDA version {cuda_version_string}')
  124. def get_binary_name():
  125. "if not has_cublaslt (CC < 7.5), then we have to choose _nocublaslt.so"
  126. bin_base_name = "libbitsandbytes_cuda"
  127. if has_cublaslt:
  128. return f"{bin_base_name}{cuda_version_string}.so"
  129. else:
  130. return f"{bin_base_name}{cuda_version_string}_nocublaslt.so"
  131. binary_name = get_binary_name()
  132. return binary_name