TomBombadyl commited on
Commit
55475f8
·
verified ·
1 Parent(s): 09811a8

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +2 -0
handler.py CHANGED
@@ -125,6 +125,7 @@ class EndpointHandler:
125
  trust_remote_code=True,
126
  device_map=device_map,
127
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
 
128
  low_cpu_mem_usage=True,
129
  offload_folder=offload_folder if device_map == "auto" else None,
130
  max_memory=max_memory,
@@ -144,6 +145,7 @@ class EndpointHandler:
144
  model_path,
145
  trust_remote_code=True,
146
  torch_dtype=torch.float32,
 
147
  low_cpu_mem_usage=True,
148
  )
149
  logger.info("Successfully loaded with FP32 on CPU")
 
125
  trust_remote_code=True,
126
  device_map=device_map,
127
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
128
+ quantization_config=None, # Disable model's built-in quantization
129
  low_cpu_mem_usage=True,
130
  offload_folder=offload_folder if device_map == "auto" else None,
131
  max_memory=max_memory,
 
145
  model_path,
146
  trust_remote_code=True,
147
  torch_dtype=torch.float32,
148
+ quantization_config=None, # Disable model's built-in quantization
149
  low_cpu_mem_usage=True,
150
  )
151
  logger.info("Successfully loaded with FP32 on CPU")