alexmarques commited on
Commit
e43c9f6
·
verified ·
1 Parent(s): ba2c463

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +17 -13
README.md CHANGED
@@ -108,6 +108,7 @@ vLLM aslo supports OpenAI-compatible serving. See the [documentation](https://do
108
  from llmcompressor.modifiers.quantization import GPTQModifier
109
  from llmcompressor.transformers import oneshot
110
  from llmcompressor.transformers.tracing import TraceableMistral3ForConditionalGeneration
 
111
  from PIL import Image
112
  import io
113
 
@@ -116,7 +117,7 @@ vLLM aslo supports OpenAI-compatible serving. See the [documentation](https://do
116
  model_name = model_stub.split("/")[-1]
117
 
118
  num_text_samples = 1024
119
- num_vison_samples = 1024
120
  max_seq_len = 8192
121
 
122
  processor = AutoProcessor.from_pretrained(model_stub)
@@ -134,13 +135,14 @@ vLLM aslo supports OpenAI-compatible serving. See the [documentation](https://do
134
  example["messages"],
135
  add_generation_prompt=False,
136
  ),
137
- "images" = None,
138
  }
139
  tokenized_input = processor(**input, max_length=max_seq_len, truncation=True)
140
  tokenized_input["pixel_values"] = tokenized_input.get("pixel_values", None)
141
  tokenized_input["image_sizes"] = tokenized_input.get("image_sizes", None)
 
142
 
143
- dst = load_dataset("neuralmagic/calibration", name="LLM", split="train").select(num_text_samples)
144
  dst = dst.map(preprocess_text, remove_columns=dst.column_names)
145
 
146
  # Text + vision data subset
@@ -149,11 +151,11 @@ vLLM aslo supports OpenAI-compatible serving. See the [documentation](https://do
149
  image = None
150
  for message in example["messages"]:
151
  message_content = []
152
- for content in message["content"]
153
  if content["type"] == "text":
154
- message_content = {"type": "text", "text": content["text"]}
155
  else:
156
- message_content = {"type": "image"}}
157
  image = Image.open(io.BytesIO(content["image"]))
158
 
159
  messages.append(
@@ -168,13 +170,14 @@ vLLM aslo supports OpenAI-compatible serving. See the [documentation](https://do
168
  messages,
169
  add_generation_prompt=False,
170
  ),
171
- "images" = image,
172
  }
173
  tokenized_input = processor(**input, max_length=max_seq_len, truncation=True)
174
  tokenized_input["pixel_values"] = tokenized_input.get("pixel_values", None)
175
  tokenized_input["image_sizes"] = tokenized_input.get("image_sizes", None)
 
176
 
177
- dsv = load_dataset("neuralmagic/calibration", name="VLLM", split="train").select(num_vision_samples)
178
  dsv = dsv.map(preprocess_vision, remove_columns=dsv.column_names)
179
 
180
  # Interleave subsets
@@ -182,9 +185,9 @@ vLLM aslo supports OpenAI-compatible serving. See the [documentation](https://do
182
 
183
  # Configure the quantization algorithm and scheme
184
  recipe = GPTQModifier(
185
- ignore=["language_model.lm_head", "re:vision_tower.*", "re:multi_modal_projector.*"]
186
- sequential_targets=["MistralDecoderLayer"]
187
- dampening_frac=0.01
188
  targets="Linear",
189
  scheme="W4A16",
190
  )
@@ -213,12 +216,13 @@ vLLM aslo supports OpenAI-compatible serving. See the [documentation](https://do
213
  recipe=recipe,
214
  max_seq_length=max_seq_len,
215
  data_collator=data_collator,
 
216
  )
217
 
218
  # Save to disk in compressed-tensors format
219
- save_path = model_name + "-quantized.w4a16
220
  model.save_pretrained(save_path)
221
- tokenizer.save_pretrained(save_path)
222
  print(f"Model and tokenizer saved to: {save_path}")
223
  ```
224
  </details>
 
108
  from llmcompressor.modifiers.quantization import GPTQModifier
109
  from llmcompressor.transformers import oneshot
110
  from llmcompressor.transformers.tracing import TraceableMistral3ForConditionalGeneration
111
+ from datasets import load_dataset, interleave_datasets
112
  from PIL import Image
113
  import io
114
 
 
117
  model_name = model_stub.split("/")[-1]
118
 
119
  num_text_samples = 1024
120
+ num_vision_samples = 1024
121
  max_seq_len = 8192
122
 
123
  processor = AutoProcessor.from_pretrained(model_stub)
 
135
  example["messages"],
136
  add_generation_prompt=False,
137
  ),
138
+ "images": None,
139
  }
140
  tokenized_input = processor(**input, max_length=max_seq_len, truncation=True)
141
  tokenized_input["pixel_values"] = tokenized_input.get("pixel_values", None)
142
  tokenized_input["image_sizes"] = tokenized_input.get("image_sizes", None)
143
+ return tokenized_input
144
 
145
+ dst = load_dataset("neuralmagic/calibration", name="LLM", split="train").select(range(num_text_samples))
146
  dst = dst.map(preprocess_text, remove_columns=dst.column_names)
147
 
148
  # Text + vision data subset
 
151
  image = None
152
  for message in example["messages"]:
153
  message_content = []
154
+ for content in message["content"]:
155
  if content["type"] == "text":
156
+ message_content.append({"type": "text", "text": content["text"]})
157
  else:
158
+ message_content.append({"type": "image"})
159
  image = Image.open(io.BytesIO(content["image"]))
160
 
161
  messages.append(
 
170
  messages,
171
  add_generation_prompt=False,
172
  ),
173
+ "images": image,
174
  }
175
  tokenized_input = processor(**input, max_length=max_seq_len, truncation=True)
176
  tokenized_input["pixel_values"] = tokenized_input.get("pixel_values", None)
177
  tokenized_input["image_sizes"] = tokenized_input.get("image_sizes", None)
178
+ return tokenized_input
179
 
180
+ dsv = load_dataset("neuralmagic/calibration", name="VLM", split="train").select(range(num_vision_samples))
181
  dsv = dsv.map(preprocess_vision, remove_columns=dsv.column_names)
182
 
183
  # Interleave subsets
 
185
 
186
  # Configure the quantization algorithm and scheme
187
  recipe = GPTQModifier(
188
+ ignore=["language_model.lm_head", "re:vision_tower.*", "re:multi_modal_projector.*"],
189
+ sequential_targets=["MistralDecoderLayer"],
190
+ dampening_frac=0.01,
191
  targets="Linear",
192
  scheme="W4A16",
193
  )
 
216
  recipe=recipe,
217
  max_seq_length=max_seq_len,
218
  data_collator=data_collator,
219
+ num_samples=num_text_samples + num_vision_samples,
220
  )
221
 
222
  # Save to disk in compressed-tensors format
223
+ save_path = model_name + "-quantized.w4a16"
224
  model.save_pretrained(save_path)
225
+ processor.save_pretrained(save_path)
226
  print(f"Model and tokenizer saved to: {save_path}")
227
  ```
228
  </details>