Temporarily disable ROCm
Browse files- build.toml +48 -48
build.toml
CHANGED
|
@@ -46,35 +46,35 @@ src = [
|
|
| 46 |
"gptq_marlin/marlin_template.h",
|
| 47 |
]
|
| 48 |
|
| 49 |
-
[kernel.fp8_common_rocm]
|
| 50 |
-
backend = "rocm"
|
| 51 |
-
depends = ["torch"]
|
| 52 |
-
rocm-archs = [
|
| 53 |
-
"gfx906",
|
| 54 |
-
"gfx908",
|
| 55 |
-
"gfx90a",
|
| 56 |
-
"gfx940",
|
| 57 |
-
"gfx941",
|
| 58 |
-
"gfx942",
|
| 59 |
-
"gfx1030",
|
| 60 |
-
"gfx1100",
|
| 61 |
-
"gfx1101",
|
| 62 |
-
]
|
| 63 |
-
include = ["."]
|
| 64 |
-
src = [
|
| 65 |
-
"attention/attention_dtypes.h",
|
| 66 |
-
"attention/attention_generic.cuh",
|
| 67 |
-
"attention/dtype_bfloat16.cuh",
|
| 68 |
-
"attention/dtype_float16.cuh",
|
| 69 |
-
"attention/dtype_float32.cuh",
|
| 70 |
-
"attention/dtype_fp8.cuh",
|
| 71 |
-
"fp8/amd/quant_utils.cuh",
|
| 72 |
-
"fp8/common.cu",
|
| 73 |
-
"fp8/common.cuh",
|
| 74 |
-
"dispatch_utils.h",
|
| 75 |
-
"utils.cuh",
|
| 76 |
-
"vectorization.cuh",
|
| 77 |
-
]
|
| 78 |
|
| 79 |
[kernel.int8_common]
|
| 80 |
backend = "cuda"
|
|
@@ -240,22 +240,22 @@ src = [
|
|
| 240 |
"marlin/sparse/marlin_24_cuda_kernel.cu",
|
| 241 |
]
|
| 242 |
|
| 243 |
-
[kernel.int8_common_rocm]
|
| 244 |
-
backend = "rocm"
|
| 245 |
-
depends = ["torch"]
|
| 246 |
-
rocm-archs = [
|
| 247 |
-
"gfx906",
|
| 248 |
-
"gfx908",
|
| 249 |
-
"gfx90a",
|
| 250 |
-
"gfx940",
|
| 251 |
-
"gfx941",
|
| 252 |
-
"gfx942",
|
| 253 |
-
"gfx1030",
|
| 254 |
-
"gfx1100",
|
| 255 |
-
"gfx1101",
|
| 256 |
-
]
|
| 257 |
-
include = ["."]
|
| 258 |
-
src = [
|
| 259 |
-
"compressed_tensors/int8_quant_kernels.cu",
|
| 260 |
-
"dispatch_utils.h",
|
| 261 |
-
]
|
|
|
|
| 46 |
"gptq_marlin/marlin_template.h",
|
| 47 |
]
|
| 48 |
|
| 49 |
+
#[kernel.fp8_common_rocm]
|
| 50 |
+
#backend = "rocm"
|
| 51 |
+
#depends = ["torch"]
|
| 52 |
+
#rocm-archs = [
|
| 53 |
+
# "gfx906",
|
| 54 |
+
# "gfx908",
|
| 55 |
+
# "gfx90a",
|
| 56 |
+
# "gfx940",
|
| 57 |
+
# "gfx941",
|
| 58 |
+
# "gfx942",
|
| 59 |
+
# "gfx1030",
|
| 60 |
+
# "gfx1100",
|
| 61 |
+
# "gfx1101",
|
| 62 |
+
#]
|
| 63 |
+
#include = ["."]
|
| 64 |
+
#src = [
|
| 65 |
+
# "attention/attention_dtypes.h",
|
| 66 |
+
# "attention/attention_generic.cuh",
|
| 67 |
+
# "attention/dtype_bfloat16.cuh",
|
| 68 |
+
# "attention/dtype_float16.cuh",
|
| 69 |
+
# "attention/dtype_float32.cuh",
|
| 70 |
+
# "attention/dtype_fp8.cuh",
|
| 71 |
+
# "fp8/amd/quant_utils.cuh",
|
| 72 |
+
# "fp8/common.cu",
|
| 73 |
+
# "fp8/common.cuh",
|
| 74 |
+
# "dispatch_utils.h",
|
| 75 |
+
# "utils.cuh",
|
| 76 |
+
# "vectorization.cuh",
|
| 77 |
+
#]
|
| 78 |
|
| 79 |
[kernel.int8_common]
|
| 80 |
backend = "cuda"
|
|
|
|
| 240 |
"marlin/sparse/marlin_24_cuda_kernel.cu",
|
| 241 |
]
|
| 242 |
|
| 243 |
+
#[kernel.int8_common_rocm]
|
| 244 |
+
#backend = "rocm"
|
| 245 |
+
#depends = ["torch"]
|
| 246 |
+
#rocm-archs = [
|
| 247 |
+
# "gfx906",
|
| 248 |
+
# "gfx908",
|
| 249 |
+
# "gfx90a",
|
| 250 |
+
# "gfx940",
|
| 251 |
+
# "gfx941",
|
| 252 |
+
# "gfx942",
|
| 253 |
+
# "gfx1030",
|
| 254 |
+
# "gfx1100",
|
| 255 |
+
# "gfx1101",
|
| 256 |
+
#]
|
| 257 |
+
#include = ["."]
|
| 258 |
+
#src = [
|
| 259 |
+
# "compressed_tensors/int8_quant_kernels.cu",
|
| 260 |
+
# "dispatch_utils.h",
|
| 261 |
+
#]
|