Update README.md
Browse files
README.md
CHANGED
|
@@ -8,7 +8,8 @@ datasets:
|
|
| 8 |
|
| 9 |
## Model Details
|
| 10 |
|
| 11 |
-
This model is a mixed int4 model with group_size 128 and symmetric quantization of [Qwen/Qwen3-Coder-480B-A35B-Instruct](https://huggingface.co/Qwen/Qwen3-Coder-480B-A35B-Instruct) generated by [intel/auto-round](https://github.com/intel/auto-round)
|
|
|
|
| 12 |
|
| 13 |
Please follow the license of the original model.
|
| 14 |
|
|
@@ -65,212 +66,6 @@ for i, prompt in enumerate(prompts):
|
|
| 65 |
print(f"Generated: {decoded_outputs[i]}")
|
| 66 |
print("-" * 50)
|
| 67 |
|
| 68 |
-
"""
|
| 69 |
-
Prompt: Write a quick sort algorithm.
|
| 70 |
-
Generated: Here's a Quick Sort implementation in Python:
|
| 71 |
-
|
| 72 |
-
```python
|
| 73 |
-
def quicksort(arr):
|
| 74 |
-
"""
|
| 75 |
-
Quick Sort algorithm implementation
|
| 76 |
-
|
| 77 |
-
Args:
|
| 78 |
-
arr: List of comparable elements
|
| 79 |
-
|
| 80 |
-
Returns:
|
| 81 |
-
Sorted list
|
| 82 |
-
"""
|
| 83 |
-
# Base case: arrays with 0 or 1 element are already sorted
|
| 84 |
-
if len(arr) <= 1:
|
| 85 |
-
return arr
|
| 86 |
-
|
| 87 |
-
# Choose pivot (using middle element)
|
| 88 |
-
pivot = arr[len(arr) // 2]
|
| 89 |
-
|
| 90 |
-
# Partition array into three parts
|
| 91 |
-
left = [x for x in arr if x < pivot] # Elements less than pivot
|
| 92 |
-
middle = [x for x in arr if x == pivot] # Elements equal to pivot
|
| 93 |
-
right = [x for x in arr if x > pivot] # Elements greater than pivot
|
| 94 |
-
|
| 95 |
-
# Recursively sort left and right partitions, then combine
|
| 96 |
-
return quicksort(left) + middle + quicksort(right)
|
| 97 |
-
|
| 98 |
-
# Alternative in-place version (more memory efficient)
|
| 99 |
-
def quicksort_inplace(arr, low=0, high=None):
|
| 100 |
-
"""
|
| 101 |
-
In-place Quick Sort implementation
|
| 102 |
-
|
| 103 |
-
Args:
|
| 104 |
-
arr: List to be sorted in-place
|
| 105 |
-
low: Starting index
|
| 106 |
-
high: Ending index
|
| 107 |
-
"""
|
| 108 |
-
if high is None:
|
| 109 |
-
high = len(arr) - 1
|
| 110 |
-
|
| 111 |
-
if low < high:
|
| 112 |
-
# Partition the array and get pivot index
|
| 113 |
-
pivot_index = partition(arr, low, high)
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
# Recursively sort elements before and after partition
|
| 117 |
-
quicksort_inplace(arr, low, pivot_index - 1)
|
| 118 |
-
quicksort_inplace(arr, pivot_index + 1, high)
|
| 119 |
-
|
| 120 |
-
def partition(arr, low, high):
|
| 121 |
-
"""
|
| 122 |
-
Partition function for in-place quicksort
|
| 123 |
-
"""
|
| 124 |
-
# Choose rightmost element as pivot
|
| 125 |
-
pivot = arr[high]
|
| 126 |
-
|
| 127 |
-
# Index of smaller element (indicates right position of pivot)
|
| 128 |
-
i = low - 1
|
| 129 |
-
|
| 130 |
-
for j in range(low, high):
|
| 131 |
-
# If current element is smaller than or equal to pivot
|
| 132 |
-
if arr[j] <= pivot:
|
| 133 |
-
i += 1
|
| 134 |
-
arr[i], arr[j] = arr[j], arr[i] # Swap elements
|
| 135 |
-
|
| 136 |
-
# Place pivot in correct position
|
| 137 |
-
arr[i + 1], arr[high] = arr[high], arr[i + 1]
|
| 138 |
-
return i + 1
|
| 139 |
-
|
| 140 |
-
# Example usage
|
| 141 |
-
if __name__ == "__main__":
|
| 142 |
-
# Test the simple version
|
| 143 |
-
test_array
|
| 144 |
-
--------------------------------------------------
|
| 145 |
-
Prompt: Write a flappy bird.
|
| 146 |
-
Generated: # Flappy Bird in PyGame
|
| 147 |
-
|
| 148 |
-
Here's a complete implementation of Flappy Bird using PyGame:
|
| 149 |
-
|
| 150 |
-
```python
|
| 151 |
-
import pygame
|
| 152 |
-
import sys
|
| 153 |
-
import random
|
| 154 |
-
|
| 155 |
-
# Initialize pygame
|
| 156 |
-
pygame.init()
|
| 157 |
-
|
| 158 |
-
# Game constants
|
| 159 |
-
WIDTH, HEIGHT = 400, 600
|
| 160 |
-
FPS = 60
|
| 161 |
-
GRAVITY = 0.25
|
| 162 |
-
FLAP_STRENGTH = -5
|
| 163 |
-
PIPE_SPEED = 3
|
| 164 |
-
PIPE_GAP = 150
|
| 165 |
-
PIPE_FREQUENCY = 1800 # milliseconds
|
| 166 |
-
GROUND_HEIGHT = 100
|
| 167 |
-
BIRD_SIZE = 30
|
| 168 |
-
|
| 169 |
-
# Colors
|
| 170 |
-
SKY_BLUE = (113, 197, 207)
|
| 171 |
-
GREEN = (111, 196, 69)
|
| 172 |
-
DARK_GREEN = (76, 145, 65)
|
| 173 |
-
BROWN = (160, 120, 40)
|
| 174 |
-
YELLOW = (255, 221, 45)
|
| 175 |
-
RED = (231, 76, 60)
|
| 176 |
-
WHITE = (255, 255, 255)
|
| 177 |
-
BLACK = (0, 0, 0)
|
| 178 |
-
|
| 179 |
-
# Set up the display
|
| 180 |
-
screen = pygame.display.set_mode((WIDTH, HEIGHT))
|
| 181 |
-
pygame.display.set_caption("Flappy Bird")
|
| 182 |
-
clock = pygame.time.Clock()
|
| 183 |
-
|
| 184 |
-
# Font setup
|
| 185 |
-
font = pygame.font.SysFont(None, 36)
|
| 186 |
-
small_font = pygame.font.SysFont(None, 24)
|
| 187 |
-
|
| 188 |
-
class Bird:
|
| 189 |
-
def __init__(self):
|
| 190 |
-
self.x = WIDTH // 3
|
| 191 |
-
self.y = HEIGHT // 2
|
| 192 |
-
self.velocity = 0
|
| 193 |
-
self.alive = True
|
| 194 |
-
self.rotation = 0
|
| 195 |
-
|
| 196 |
-
def flap(self):
|
| 197 |
-
self.velocity = FLAP_STRENGTH
|
| 198 |
-
|
| 199 |
-
def update(self):
|
| 200 |
-
# Apply gravity
|
| 201 |
-
self.velocity += GRAVITY
|
| 202 |
-
self.y += self.velocity
|
| 203 |
-
|
| 204 |
-
# Rotate bird based on velocity
|
| 205 |
-
self.rotation = max(-30, min(self.velocity * 3, 90))
|
| 206 |
-
|
| 207 |
-
# Check if bird hits the ground or ceiling
|
| 208 |
-
if self.y >= HEIGHT - GROUND_HEIGHT - BIRD_SIZE//2:
|
| 209 |
-
self.y = HEIGHT - GROUND_HEIGHT - BIRD_SIZE//2
|
| 210 |
-
self.alive = False
|
| 211 |
-
if self.y <= 0:
|
| 212 |
-
self.y = 0
|
| 213 |
-
self.velocity = 0
|
| 214 |
-
|
| 215 |
-
def draw(self
|
| 216 |
-
--------------------------------------------------
|
| 217 |
-
Prompt: Write a llm quantization algorithm.
|
| 218 |
-
Generated: Here's a comprehensive implementation of LLM quantization algorithms, including post-training quantization and QLoRA-style quantization:
|
| 219 |
-
|
| 220 |
-
```python
|
| 221 |
-
import torch
|
| 222 |
-
import torch.nn as nn
|
| 223 |
-
from typing import Dict, Tuple, Optional
|
| 224 |
-
import math
|
| 225 |
-
|
| 226 |
-
class Quantizer:
|
| 227 |
-
"""Base class for quantization operations"""
|
| 228 |
-
|
| 229 |
-
@staticmethod
|
| 230 |
-
def symmetric_quantize(tensor: torch.Tensor, bits: int = 8) -> Tuple[torch.Tensor, float]:
|
| 231 |
-
"""
|
| 232 |
-
Symmetric quantization for weights
|
| 233 |
-
Returns quantized tensor and scale factor
|
| 234 |
-
"""
|
| 235 |
-
max_val = tensor.abs().max()
|
| 236 |
-
scale = max_val / (2 ** (bits - 1) - 1)
|
| 237 |
-
|
| 238 |
-
# Quantize to integer values
|
| 239 |
-
quantized = torch.round(tensor / scale).clamp(-2**(bits-1), 2**(bits-1)-1)
|
| 240 |
-
return quantized.to(torch.int8), scale
|
| 241 |
-
|
| 242 |
-
@staticmethod
|
| 243 |
-
def asymmetric_quantize(tensor: torch.Tensor, bits: int = 8) -> Tuple[torch.Tensor, float, float]:
|
| 244 |
-
"""
|
| 245 |
-
Asymmetric quantization for activations
|
| 246 |
-
Returns quantized tensor, scale, and zero point
|
| 247 |
-
"""
|
| 248 |
-
min_val, max_val = tensor.min(), tensor.max()
|
| 249 |
-
scale = (max_val - min_val) / (2**bits - 1)
|
| 250 |
-
zero_point = torch.round(-min_val / scale).clamp(0, 2**bits-1)
|
| 251 |
-
|
| 252 |
-
# Quantize with zero point
|
| 253 |
-
quantized = torch.round(tensor / scale + zero_point).clamp(0, 2**bits-1)
|
| 254 |
-
return quantized.to(torch.uint8), scale, zero_point
|
| 255 |
-
|
| 256 |
-
@staticmethod
|
| 257 |
-
def dequantize(quantized: torch.Tensor, scale: float, zero_point: Optional[float] = None) -> torch.Tensor:
|
| 258 |
-
"""Dequantize tensor back to floating point"""
|
| 259 |
-
if zero_point is not None:
|
| 260 |
-
return (quantized.float() - zero_point) * scale
|
| 261 |
-
else:
|
| 262 |
-
return quantized.float() * scale
|
| 263 |
-
|
| 264 |
-
class NF4Quantizer:
|
| 265 |
-
"""4-bit NormalFloat quantization (NF4)"""
|
| 266 |
-
|
| 267 |
-
def __init__(self):
|
| 268 |
-
# Pre-defined NF4 values normalized to [-1, 1]
|
| 269 |
-
self.norm_floats = torch.tensor([
|
| 270 |
-
-1.0, -0.6962, -0.5251, -0.3949, -0.2844,
|
| 271 |
-
--------------------------------------------------
|
| 272 |
-
|
| 273 |
-
"""
|
| 274 |
~~~
|
| 275 |
|
| 276 |
### Generate the model
|
|
@@ -307,8 +102,14 @@ for n, m in block.named_modules():
|
|
| 307 |
|
| 308 |
device_map.update({n: device})
|
| 309 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 310 |
autoround = AutoRound(
|
| 311 |
-
model=model, tokenizer=tokenizer, device_map=device_map, nsamples=512,dataset="github-code-clean")
|
| 312 |
autoround.quantize_and_save(format="auto_round", output_dir="./Qwen3-Coder-480B-A35B-Instruct-int4")
|
| 313 |
|
| 314 |
```
|
|
|
|
| 8 |
|
| 9 |
## Model Details
|
| 10 |
|
| 11 |
+
This model is a mixed int4 model with group_size 128 and symmetric quantization of [Qwen/Qwen3-Coder-480B-A35B-Instruct](https://huggingface.co/Qwen/Qwen3-Coder-480B-A35B-Instruct) generated by [intel/auto-round](https://github.com/intel/auto-round) algorithm
|
| 12 |
+
`mlp.gate` layers fallback to 16 bits to ensure runing successfully on vLLM.
|
| 13 |
|
| 14 |
Please follow the license of the original model.
|
| 15 |
|
|
|
|
| 66 |
print(f"Generated: {decoded_outputs[i]}")
|
| 67 |
print("-" * 50)
|
| 68 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
~~~
|
| 70 |
|
| 71 |
### Generate the model
|
|
|
|
| 102 |
|
| 103 |
device_map.update({n: device})
|
| 104 |
|
| 105 |
+
layer_config = {}
|
| 106 |
+
for n, m in model.named_modules():
|
| 107 |
+
if "mlp.gate" in n: ## vllm only support 16 bit for this layer
|
| 108 |
+
layer_config[n] = {"bits": 16}
|
| 109 |
+
|
| 110 |
+
|
| 111 |
autoround = AutoRound(
|
| 112 |
+
model=model, tokenizer=tokenizer, device_map=device_map, nsamples=512,dataset="github-code-clean", layer_config=layer_config)
|
| 113 |
autoround.quantize_and_save(format="auto_round", output_dir="./Qwen3-Coder-480B-A35B-Instruct-int4")
|
| 114 |
|
| 115 |
```
|