| { | |
| "batchers": null, | |
| "cachers": null, | |
| "compilers": null, | |
| "distillers": null, | |
| "pruners": null, | |
| "quantizers": "llm-int8", | |
| "recoverers": null, | |
| "quant_llm-int8_compute_dtype": "bfloat16", | |
| "quant_llm-int8_double_quant": false, | |
| "quant_llm-int8_enable_fp32_cpu_offload": false, | |
| "quant_llm-int8_has_fp16_weight": false, | |
| "quant_llm-int8_quant_type": "fp4", | |
| "quant_llm-int8_threshold": 6.0, | |
| "quant_llm-int8_weight_bits": 8, | |
| "max_batch_size": 1, | |
| "device": "cuda", | |
| "cache_dir": "/tmp/models/tmpqd9n0q63", | |
| "task": "", | |
| "save_load_fn": "llm-int8", | |
| "save_load_fn_args": {}, | |
| "api_key": null | |
| } |