RedHatAI
/

quantization

Model card Files Files and versions

danieldk HF Staff commited on Jul 4

Commit

d5fd23d

·

1 Parent(s): 8aa00a3

Temporarily disable ROCm

Files changed (1) hide show

build.toml +48 -48

build.toml CHANGED Viewed

@@ -46,35 +46,35 @@ src = [
     "gptq_marlin/marlin_template.h",
 ]
-[kernel.fp8_common_rocm]
-backend = "rocm"
-depends = ["torch"]
-rocm-archs = [
-    "gfx906",
-    "gfx908",
-    "gfx90a",
-    "gfx940",
-    "gfx941",
-    "gfx942",
-    "gfx1030",
-    "gfx1100",
-    "gfx1101",
-]
-include = ["."]
-src = [
-    "attention/attention_dtypes.h",
-    "attention/attention_generic.cuh",
-    "attention/dtype_bfloat16.cuh",
-    "attention/dtype_float16.cuh",
-    "attention/dtype_float32.cuh",
-    "attention/dtype_fp8.cuh",
-    "fp8/amd/quant_utils.cuh",
-    "fp8/common.cu",
-    "fp8/common.cuh",
-    "dispatch_utils.h",
-    "utils.cuh",
-    "vectorization.cuh",
-]
 [kernel.int8_common]
 backend = "cuda"
@@ -240,22 +240,22 @@ src = [
     "marlin/sparse/marlin_24_cuda_kernel.cu",
 ]
-[kernel.int8_common_rocm]
-backend = "rocm"
-depends = ["torch"]
-rocm-archs = [
-    "gfx906",
-    "gfx908",
-    "gfx90a",
-    "gfx940",
-    "gfx941",
-    "gfx942",
-    "gfx1030",
-    "gfx1100",
-    "gfx1101",
-]
-include = ["."]
-src = [
-    "compressed_tensors/int8_quant_kernels.cu",
-    "dispatch_utils.h",
-]

     "gptq_marlin/marlin_template.h",
 ]
+#[kernel.fp8_common_rocm]
+#backend = "rocm"
+#depends = ["torch"]
+#rocm-archs = [
+#    "gfx906",
+#    "gfx908",
+#    "gfx90a",
+#    "gfx940",
+#    "gfx941",
+#    "gfx942",
+#    "gfx1030",
+#    "gfx1100",
+#    "gfx1101",
+#]
+#include = ["."]
+#src = [
+#    "attention/attention_dtypes.h",
+#    "attention/attention_generic.cuh",
+#    "attention/dtype_bfloat16.cuh",
+#    "attention/dtype_float16.cuh",
+#    "attention/dtype_float32.cuh",
+#    "attention/dtype_fp8.cuh",
+#    "fp8/amd/quant_utils.cuh",
+#    "fp8/common.cu",
+#    "fp8/common.cuh",
+#    "dispatch_utils.h",
+#    "utils.cuh",
+#    "vectorization.cuh",
+#]
 [kernel.int8_common]
 backend = "cuda"
     "marlin/sparse/marlin_24_cuda_kernel.cu",
 ]
+#[kernel.int8_common_rocm]
+#backend = "rocm"
+#depends = ["torch"]
+#rocm-archs = [
+#    "gfx906",
+#    "gfx908",
+#    "gfx90a",
+#    "gfx940",
+#    "gfx941",
+#    "gfx942",
+#    "gfx1030",
+#    "gfx1100",
+#    "gfx1101",
+#]
+#include = ["."]
+#src = [
+#    "compressed_tensors/int8_quant_kernels.cu",
+#    "dispatch_utils.h",
+#]