wenhuach commited on
Commit
0e8eae2
·
verified ·
1 Parent(s): 8db9242

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +9 -208
README.md CHANGED
@@ -8,7 +8,8 @@ datasets:
8
 
9
  ## Model Details
10
 
11
- This model is a mixed int4 model with group_size 128 and symmetric quantization of [Qwen/Qwen3-Coder-480B-A35B-Instruct](https://huggingface.co/Qwen/Qwen3-Coder-480B-A35B-Instruct) generated by [intel/auto-round](https://github.com/intel/auto-round) via **auot-round-light**
 
12
 
13
  Please follow the license of the original model.
14
 
@@ -65,212 +66,6 @@ for i, prompt in enumerate(prompts):
65
  print(f"Generated: {decoded_outputs[i]}")
66
  print("-" * 50)
67
 
68
- """
69
- Prompt: Write a quick sort algorithm.
70
- Generated: Here's a Quick Sort implementation in Python:
71
-
72
- ```python
73
- def quicksort(arr):
74
- """
75
- Quick Sort algorithm implementation
76
-
77
- Args:
78
- arr: List of comparable elements
79
-
80
- Returns:
81
- Sorted list
82
- """
83
- # Base case: arrays with 0 or 1 element are already sorted
84
- if len(arr) <= 1:
85
- return arr
86
-
87
- # Choose pivot (using middle element)
88
- pivot = arr[len(arr) // 2]
89
-
90
- # Partition array into three parts
91
- left = [x for x in arr if x < pivot] # Elements less than pivot
92
- middle = [x for x in arr if x == pivot] # Elements equal to pivot
93
- right = [x for x in arr if x > pivot] # Elements greater than pivot
94
-
95
- # Recursively sort left and right partitions, then combine
96
- return quicksort(left) + middle + quicksort(right)
97
-
98
- # Alternative in-place version (more memory efficient)
99
- def quicksort_inplace(arr, low=0, high=None):
100
- """
101
- In-place Quick Sort implementation
102
-
103
- Args:
104
- arr: List to be sorted in-place
105
- low: Starting index
106
- high: Ending index
107
- """
108
- if high is None:
109
- high = len(arr) - 1
110
-
111
- if low < high:
112
- # Partition the array and get pivot index
113
- pivot_index = partition(arr, low, high)
114
-
115
-
116
- # Recursively sort elements before and after partition
117
- quicksort_inplace(arr, low, pivot_index - 1)
118
- quicksort_inplace(arr, pivot_index + 1, high)
119
-
120
- def partition(arr, low, high):
121
- """
122
- Partition function for in-place quicksort
123
- """
124
- # Choose rightmost element as pivot
125
- pivot = arr[high]
126
-
127
- # Index of smaller element (indicates right position of pivot)
128
- i = low - 1
129
-
130
- for j in range(low, high):
131
- # If current element is smaller than or equal to pivot
132
- if arr[j] <= pivot:
133
- i += 1
134
- arr[i], arr[j] = arr[j], arr[i] # Swap elements
135
-
136
- # Place pivot in correct position
137
- arr[i + 1], arr[high] = arr[high], arr[i + 1]
138
- return i + 1
139
-
140
- # Example usage
141
- if __name__ == "__main__":
142
- # Test the simple version
143
- test_array
144
- --------------------------------------------------
145
- Prompt: Write a flappy bird.
146
- Generated: # Flappy Bird in PyGame
147
-
148
- Here's a complete implementation of Flappy Bird using PyGame:
149
-
150
- ```python
151
- import pygame
152
- import sys
153
- import random
154
-
155
- # Initialize pygame
156
- pygame.init()
157
-
158
- # Game constants
159
- WIDTH, HEIGHT = 400, 600
160
- FPS = 60
161
- GRAVITY = 0.25
162
- FLAP_STRENGTH = -5
163
- PIPE_SPEED = 3
164
- PIPE_GAP = 150
165
- PIPE_FREQUENCY = 1800 # milliseconds
166
- GROUND_HEIGHT = 100
167
- BIRD_SIZE = 30
168
-
169
- # Colors
170
- SKY_BLUE = (113, 197, 207)
171
- GREEN = (111, 196, 69)
172
- DARK_GREEN = (76, 145, 65)
173
- BROWN = (160, 120, 40)
174
- YELLOW = (255, 221, 45)
175
- RED = (231, 76, 60)
176
- WHITE = (255, 255, 255)
177
- BLACK = (0, 0, 0)
178
-
179
- # Set up the display
180
- screen = pygame.display.set_mode((WIDTH, HEIGHT))
181
- pygame.display.set_caption("Flappy Bird")
182
- clock = pygame.time.Clock()
183
-
184
- # Font setup
185
- font = pygame.font.SysFont(None, 36)
186
- small_font = pygame.font.SysFont(None, 24)
187
-
188
- class Bird:
189
- def __init__(self):
190
- self.x = WIDTH // 3
191
- self.y = HEIGHT // 2
192
- self.velocity = 0
193
- self.alive = True
194
- self.rotation = 0
195
-
196
- def flap(self):
197
- self.velocity = FLAP_STRENGTH
198
-
199
- def update(self):
200
- # Apply gravity
201
- self.velocity += GRAVITY
202
- self.y += self.velocity
203
-
204
- # Rotate bird based on velocity
205
- self.rotation = max(-30, min(self.velocity * 3, 90))
206
-
207
- # Check if bird hits the ground or ceiling
208
- if self.y >= HEIGHT - GROUND_HEIGHT - BIRD_SIZE//2:
209
- self.y = HEIGHT - GROUND_HEIGHT - BIRD_SIZE//2
210
- self.alive = False
211
- if self.y <= 0:
212
- self.y = 0
213
- self.velocity = 0
214
-
215
- def draw(self
216
- --------------------------------------------------
217
- Prompt: Write a llm quantization algorithm.
218
- Generated: Here's a comprehensive implementation of LLM quantization algorithms, including post-training quantization and QLoRA-style quantization:
219
-
220
- ```python
221
- import torch
222
- import torch.nn as nn
223
- from typing import Dict, Tuple, Optional
224
- import math
225
-
226
- class Quantizer:
227
- """Base class for quantization operations"""
228
-
229
- @staticmethod
230
- def symmetric_quantize(tensor: torch.Tensor, bits: int = 8) -> Tuple[torch.Tensor, float]:
231
- """
232
- Symmetric quantization for weights
233
- Returns quantized tensor and scale factor
234
- """
235
- max_val = tensor.abs().max()
236
- scale = max_val / (2 ** (bits - 1) - 1)
237
-
238
- # Quantize to integer values
239
- quantized = torch.round(tensor / scale).clamp(-2**(bits-1), 2**(bits-1)-1)
240
- return quantized.to(torch.int8), scale
241
-
242
- @staticmethod
243
- def asymmetric_quantize(tensor: torch.Tensor, bits: int = 8) -> Tuple[torch.Tensor, float, float]:
244
- """
245
- Asymmetric quantization for activations
246
- Returns quantized tensor, scale, and zero point
247
- """
248
- min_val, max_val = tensor.min(), tensor.max()
249
- scale = (max_val - min_val) / (2**bits - 1)
250
- zero_point = torch.round(-min_val / scale).clamp(0, 2**bits-1)
251
-
252
- # Quantize with zero point
253
- quantized = torch.round(tensor / scale + zero_point).clamp(0, 2**bits-1)
254
- return quantized.to(torch.uint8), scale, zero_point
255
-
256
- @staticmethod
257
- def dequantize(quantized: torch.Tensor, scale: float, zero_point: Optional[float] = None) -> torch.Tensor:
258
- """Dequantize tensor back to floating point"""
259
- if zero_point is not None:
260
- return (quantized.float() - zero_point) * scale
261
- else:
262
- return quantized.float() * scale
263
-
264
- class NF4Quantizer:
265
- """4-bit NormalFloat quantization (NF4)"""
266
-
267
- def __init__(self):
268
- # Pre-defined NF4 values normalized to [-1, 1]
269
- self.norm_floats = torch.tensor([
270
- -1.0, -0.6962, -0.5251, -0.3949, -0.2844,
271
- --------------------------------------------------
272
-
273
- """
274
  ~~~
275
 
276
  ### Generate the model
@@ -307,8 +102,14 @@ for n, m in block.named_modules():
307
 
308
  device_map.update({n: device})
309
 
 
 
 
 
 
 
310
  autoround = AutoRound(
311
- model=model, tokenizer=tokenizer, device_map=device_map, nsamples=512,dataset="github-code-clean")
312
  autoround.quantize_and_save(format="auto_round", output_dir="./Qwen3-Coder-480B-A35B-Instruct-int4")
313
 
314
  ```
 
8
 
9
  ## Model Details
10
 
11
+ This model is a mixed int4 model with group_size 128 and symmetric quantization of [Qwen/Qwen3-Coder-480B-A35B-Instruct](https://huggingface.co/Qwen/Qwen3-Coder-480B-A35B-Instruct) generated by [intel/auto-round](https://github.com/intel/auto-round) algorithm
12
+ `mlp.gate` layers fallback to 16 bits to ensure runing successfully on vLLM.
13
 
14
  Please follow the license of the original model.
15
 
 
66
  print(f"Generated: {decoded_outputs[i]}")
67
  print("-" * 50)
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  ~~~
70
 
71
  ### Generate the model
 
102
 
103
  device_map.update({n: device})
104
 
105
+ layer_config = {}
106
+ for n, m in model.named_modules():
107
+ if "mlp.gate" in n: ## vllm only support 16 bit for this layer
108
+ layer_config[n] = {"bits": 16}
109
+
110
+
111
  autoround = AutoRound(
112
+ model=model, tokenizer=tokenizer, device_map=device_map, nsamples=512,dataset="github-code-clean", layer_config=layer_config)
113
  autoround.quantize_and_save(format="auto_round", output_dir="./Qwen3-Coder-480B-A35B-Instruct-int4")
114
 
115
  ```