Update README.md
Browse files
README.md
CHANGED
@@ -8,7 +8,8 @@ datasets:
|
|
8 |
|
9 |
## Model Details
|
10 |
|
11 |
-
This model is a mixed int4 model with group_size 128 and symmetric quantization of [Qwen/Qwen3-Coder-480B-A35B-Instruct](https://huggingface.co/Qwen/Qwen3-Coder-480B-A35B-Instruct) generated by [intel/auto-round](https://github.com/intel/auto-round)
|
|
|
12 |
|
13 |
Please follow the license of the original model.
|
14 |
|
@@ -65,212 +66,6 @@ for i, prompt in enumerate(prompts):
|
|
65 |
print(f"Generated: {decoded_outputs[i]}")
|
66 |
print("-" * 50)
|
67 |
|
68 |
-
"""
|
69 |
-
Prompt: Write a quick sort algorithm.
|
70 |
-
Generated: Here's a Quick Sort implementation in Python:
|
71 |
-
|
72 |
-
```python
|
73 |
-
def quicksort(arr):
|
74 |
-
"""
|
75 |
-
Quick Sort algorithm implementation
|
76 |
-
|
77 |
-
Args:
|
78 |
-
arr: List of comparable elements
|
79 |
-
|
80 |
-
Returns:
|
81 |
-
Sorted list
|
82 |
-
"""
|
83 |
-
# Base case: arrays with 0 or 1 element are already sorted
|
84 |
-
if len(arr) <= 1:
|
85 |
-
return arr
|
86 |
-
|
87 |
-
# Choose pivot (using middle element)
|
88 |
-
pivot = arr[len(arr) // 2]
|
89 |
-
|
90 |
-
# Partition array into three parts
|
91 |
-
left = [x for x in arr if x < pivot] # Elements less than pivot
|
92 |
-
middle = [x for x in arr if x == pivot] # Elements equal to pivot
|
93 |
-
right = [x for x in arr if x > pivot] # Elements greater than pivot
|
94 |
-
|
95 |
-
# Recursively sort left and right partitions, then combine
|
96 |
-
return quicksort(left) + middle + quicksort(right)
|
97 |
-
|
98 |
-
# Alternative in-place version (more memory efficient)
|
99 |
-
def quicksort_inplace(arr, low=0, high=None):
|
100 |
-
"""
|
101 |
-
In-place Quick Sort implementation
|
102 |
-
|
103 |
-
Args:
|
104 |
-
arr: List to be sorted in-place
|
105 |
-
low: Starting index
|
106 |
-
high: Ending index
|
107 |
-
"""
|
108 |
-
if high is None:
|
109 |
-
high = len(arr) - 1
|
110 |
-
|
111 |
-
if low < high:
|
112 |
-
# Partition the array and get pivot index
|
113 |
-
pivot_index = partition(arr, low, high)
|
114 |
-
|
115 |
-
|
116 |
-
# Recursively sort elements before and after partition
|
117 |
-
quicksort_inplace(arr, low, pivot_index - 1)
|
118 |
-
quicksort_inplace(arr, pivot_index + 1, high)
|
119 |
-
|
120 |
-
def partition(arr, low, high):
|
121 |
-
"""
|
122 |
-
Partition function for in-place quicksort
|
123 |
-
"""
|
124 |
-
# Choose rightmost element as pivot
|
125 |
-
pivot = arr[high]
|
126 |
-
|
127 |
-
# Index of smaller element (indicates right position of pivot)
|
128 |
-
i = low - 1
|
129 |
-
|
130 |
-
for j in range(low, high):
|
131 |
-
# If current element is smaller than or equal to pivot
|
132 |
-
if arr[j] <= pivot:
|
133 |
-
i += 1
|
134 |
-
arr[i], arr[j] = arr[j], arr[i] # Swap elements
|
135 |
-
|
136 |
-
# Place pivot in correct position
|
137 |
-
arr[i + 1], arr[high] = arr[high], arr[i + 1]
|
138 |
-
return i + 1
|
139 |
-
|
140 |
-
# Example usage
|
141 |
-
if __name__ == "__main__":
|
142 |
-
# Test the simple version
|
143 |
-
test_array
|
144 |
-
--------------------------------------------------
|
145 |
-
Prompt: Write a flappy bird.
|
146 |
-
Generated: # Flappy Bird in PyGame
|
147 |
-
|
148 |
-
Here's a complete implementation of Flappy Bird using PyGame:
|
149 |
-
|
150 |
-
```python
|
151 |
-
import pygame
|
152 |
-
import sys
|
153 |
-
import random
|
154 |
-
|
155 |
-
# Initialize pygame
|
156 |
-
pygame.init()
|
157 |
-
|
158 |
-
# Game constants
|
159 |
-
WIDTH, HEIGHT = 400, 600
|
160 |
-
FPS = 60
|
161 |
-
GRAVITY = 0.25
|
162 |
-
FLAP_STRENGTH = -5
|
163 |
-
PIPE_SPEED = 3
|
164 |
-
PIPE_GAP = 150
|
165 |
-
PIPE_FREQUENCY = 1800 # milliseconds
|
166 |
-
GROUND_HEIGHT = 100
|
167 |
-
BIRD_SIZE = 30
|
168 |
-
|
169 |
-
# Colors
|
170 |
-
SKY_BLUE = (113, 197, 207)
|
171 |
-
GREEN = (111, 196, 69)
|
172 |
-
DARK_GREEN = (76, 145, 65)
|
173 |
-
BROWN = (160, 120, 40)
|
174 |
-
YELLOW = (255, 221, 45)
|
175 |
-
RED = (231, 76, 60)
|
176 |
-
WHITE = (255, 255, 255)
|
177 |
-
BLACK = (0, 0, 0)
|
178 |
-
|
179 |
-
# Set up the display
|
180 |
-
screen = pygame.display.set_mode((WIDTH, HEIGHT))
|
181 |
-
pygame.display.set_caption("Flappy Bird")
|
182 |
-
clock = pygame.time.Clock()
|
183 |
-
|
184 |
-
# Font setup
|
185 |
-
font = pygame.font.SysFont(None, 36)
|
186 |
-
small_font = pygame.font.SysFont(None, 24)
|
187 |
-
|
188 |
-
class Bird:
|
189 |
-
def __init__(self):
|
190 |
-
self.x = WIDTH // 3
|
191 |
-
self.y = HEIGHT // 2
|
192 |
-
self.velocity = 0
|
193 |
-
self.alive = True
|
194 |
-
self.rotation = 0
|
195 |
-
|
196 |
-
def flap(self):
|
197 |
-
self.velocity = FLAP_STRENGTH
|
198 |
-
|
199 |
-
def update(self):
|
200 |
-
# Apply gravity
|
201 |
-
self.velocity += GRAVITY
|
202 |
-
self.y += self.velocity
|
203 |
-
|
204 |
-
# Rotate bird based on velocity
|
205 |
-
self.rotation = max(-30, min(self.velocity * 3, 90))
|
206 |
-
|
207 |
-
# Check if bird hits the ground or ceiling
|
208 |
-
if self.y >= HEIGHT - GROUND_HEIGHT - BIRD_SIZE//2:
|
209 |
-
self.y = HEIGHT - GROUND_HEIGHT - BIRD_SIZE//2
|
210 |
-
self.alive = False
|
211 |
-
if self.y <= 0:
|
212 |
-
self.y = 0
|
213 |
-
self.velocity = 0
|
214 |
-
|
215 |
-
def draw(self
|
216 |
-
--------------------------------------------------
|
217 |
-
Prompt: Write a llm quantization algorithm.
|
218 |
-
Generated: Here's a comprehensive implementation of LLM quantization algorithms, including post-training quantization and QLoRA-style quantization:
|
219 |
-
|
220 |
-
```python
|
221 |
-
import torch
|
222 |
-
import torch.nn as nn
|
223 |
-
from typing import Dict, Tuple, Optional
|
224 |
-
import math
|
225 |
-
|
226 |
-
class Quantizer:
|
227 |
-
"""Base class for quantization operations"""
|
228 |
-
|
229 |
-
@staticmethod
|
230 |
-
def symmetric_quantize(tensor: torch.Tensor, bits: int = 8) -> Tuple[torch.Tensor, float]:
|
231 |
-
"""
|
232 |
-
Symmetric quantization for weights
|
233 |
-
Returns quantized tensor and scale factor
|
234 |
-
"""
|
235 |
-
max_val = tensor.abs().max()
|
236 |
-
scale = max_val / (2 ** (bits - 1) - 1)
|
237 |
-
|
238 |
-
# Quantize to integer values
|
239 |
-
quantized = torch.round(tensor / scale).clamp(-2**(bits-1), 2**(bits-1)-1)
|
240 |
-
return quantized.to(torch.int8), scale
|
241 |
-
|
242 |
-
@staticmethod
|
243 |
-
def asymmetric_quantize(tensor: torch.Tensor, bits: int = 8) -> Tuple[torch.Tensor, float, float]:
|
244 |
-
"""
|
245 |
-
Asymmetric quantization for activations
|
246 |
-
Returns quantized tensor, scale, and zero point
|
247 |
-
"""
|
248 |
-
min_val, max_val = tensor.min(), tensor.max()
|
249 |
-
scale = (max_val - min_val) / (2**bits - 1)
|
250 |
-
zero_point = torch.round(-min_val / scale).clamp(0, 2**bits-1)
|
251 |
-
|
252 |
-
# Quantize with zero point
|
253 |
-
quantized = torch.round(tensor / scale + zero_point).clamp(0, 2**bits-1)
|
254 |
-
return quantized.to(torch.uint8), scale, zero_point
|
255 |
-
|
256 |
-
@staticmethod
|
257 |
-
def dequantize(quantized: torch.Tensor, scale: float, zero_point: Optional[float] = None) -> torch.Tensor:
|
258 |
-
"""Dequantize tensor back to floating point"""
|
259 |
-
if zero_point is not None:
|
260 |
-
return (quantized.float() - zero_point) * scale
|
261 |
-
else:
|
262 |
-
return quantized.float() * scale
|
263 |
-
|
264 |
-
class NF4Quantizer:
|
265 |
-
"""4-bit NormalFloat quantization (NF4)"""
|
266 |
-
|
267 |
-
def __init__(self):
|
268 |
-
# Pre-defined NF4 values normalized to [-1, 1]
|
269 |
-
self.norm_floats = torch.tensor([
|
270 |
-
-1.0, -0.6962, -0.5251, -0.3949, -0.2844,
|
271 |
-
--------------------------------------------------
|
272 |
-
|
273 |
-
"""
|
274 |
~~~
|
275 |
|
276 |
### Generate the model
|
@@ -307,8 +102,14 @@ for n, m in block.named_modules():
|
|
307 |
|
308 |
device_map.update({n: device})
|
309 |
|
|
|
|
|
|
|
|
|
|
|
|
|
310 |
autoround = AutoRound(
|
311 |
-
model=model, tokenizer=tokenizer, device_map=device_map, nsamples=512,dataset="github-code-clean")
|
312 |
autoround.quantize_and_save(format="auto_round", output_dir="./Qwen3-Coder-480B-A35B-Instruct-int4")
|
313 |
|
314 |
```
|
|
|
8 |
|
9 |
## Model Details
|
10 |
|
11 |
+
This model is a mixed int4 model with group_size 128 and symmetric quantization of [Qwen/Qwen3-Coder-480B-A35B-Instruct](https://huggingface.co/Qwen/Qwen3-Coder-480B-A35B-Instruct) generated by [intel/auto-round](https://github.com/intel/auto-round) algorithm
|
12 |
+
`mlp.gate` layers fallback to 16 bits to ensure runing successfully on vLLM.
|
13 |
|
14 |
Please follow the license of the original model.
|
15 |
|
|
|
66 |
print(f"Generated: {decoded_outputs[i]}")
|
67 |
print("-" * 50)
|
68 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
~~~
|
70 |
|
71 |
### Generate the model
|
|
|
102 |
|
103 |
device_map.update({n: device})
|
104 |
|
105 |
+
layer_config = {}
|
106 |
+
for n, m in model.named_modules():
|
107 |
+
if "mlp.gate" in n: ## vllm only support 16 bit for this layer
|
108 |
+
layer_config[n] = {"bits": 16}
|
109 |
+
|
110 |
+
|
111 |
autoround = AutoRound(
|
112 |
+
model=model, tokenizer=tokenizer, device_map=device_map, nsamples=512,dataset="github-code-clean", layer_config=layer_config)
|
113 |
autoround.quantize_and_save(format="auto_round", output_dir="./Qwen3-Coder-480B-A35B-Instruct-int4")
|
114 |
|
115 |
```
|