TomBombadyl commited on
Commit
09811a8
·
verified ·
1 Parent(s): d611c13

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +7 -2
handler.py CHANGED
@@ -87,11 +87,13 @@ class EndpointHandler:
87
  model = None
88
  quantization_config = None
89
 
90
- # Attempt 1: Try with 8-bit quantization
91
  if torch.cuda.is_available():
92
  try:
 
 
93
  from transformers import BitsAndBytesConfig
94
- logger.info("Attempting to load with 8-bit quantization...")
95
 
96
  bnb_config = BitsAndBytesConfig(load_in_8bit=True)
97
 
@@ -107,6 +109,9 @@ class EndpointHandler:
107
  logger.info("Successfully loaded with 8-bit quantization")
108
  quantization_config = "8-bit"
109
 
 
 
 
110
  except Exception as e:
111
  logger.warning(f"8-bit quantization failed: {str(e)}")
112
  logger.info("Falling back to FP16 without quantization...")
 
87
  model = None
88
  quantization_config = None
89
 
90
+ # Attempt 1: Try with 8-bit quantization (if bitsandbytes is available)
91
  if torch.cuda.is_available():
92
  try:
93
+ # Check if bitsandbytes is available
94
+ import bitsandbytes
95
  from transformers import BitsAndBytesConfig
96
+ logger.info("bitsandbytes available, attempting 8-bit quantization...")
97
 
98
  bnb_config = BitsAndBytesConfig(load_in_8bit=True)
99
 
 
109
  logger.info("Successfully loaded with 8-bit quantization")
110
  quantization_config = "8-bit"
111
 
112
+ except ImportError as e:
113
+ logger.info(f"bitsandbytes not available ({str(e)}), skipping quantization...")
114
+ model = None
115
  except Exception as e:
116
  logger.warning(f"8-bit quantization failed: {str(e)}")
117
  logger.info("Falling back to FP16 without quantization...")