Text Generation
Transformers
Safetensors
llama
code
granite
Eval Results (legacy)
text-generation-inference
Instructions to use royleibov/granite-3b-code-base-128k-ZipNN-Compressed with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use royleibov/granite-3b-code-base-128k-ZipNN-Compressed with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="royleibov/granite-3b-code-base-128k-ZipNN-Compressed")# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("royleibov/granite-3b-code-base-128k-ZipNN-Compressed") model = AutoModelForCausalLM.from_pretrained("royleibov/granite-3b-code-base-128k-ZipNN-Compressed") - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use royleibov/granite-3b-code-base-128k-ZipNN-Compressed with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "royleibov/granite-3b-code-base-128k-ZipNN-Compressed" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "royleibov/granite-3b-code-base-128k-ZipNN-Compressed", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker
docker model run hf.co/royleibov/granite-3b-code-base-128k-ZipNN-Compressed
- SGLang
How to use royleibov/granite-3b-code-base-128k-ZipNN-Compressed with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "royleibov/granite-3b-code-base-128k-ZipNN-Compressed" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "royleibov/granite-3b-code-base-128k-ZipNN-Compressed", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "royleibov/granite-3b-code-base-128k-ZipNN-Compressed" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "royleibov/granite-3b-code-base-128k-ZipNN-Compressed", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }' - Docker Model Runner
How to use royleibov/granite-3b-code-base-128k-ZipNN-Compressed with Docker Model Runner:
docker model run hf.co/royleibov/granite-3b-code-base-128k-ZipNN-Compressed
| import os | |
| import subprocess | |
| import sys | |
| import argparse | |
| sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) | |
| RED = "\033[91m" | |
| YELLOW = "\033[93m" | |
| GREEN = "\033[92m" | |
| RESET = "\033[0m" | |
| def check_and_install_zipnn(): | |
| try: | |
| import zipnn | |
| except ImportError: | |
| print("zipnn not found. Installing...") | |
| subprocess.check_call([sys.executable, "-m", "pip", "install", "zipnn"]) | |
| import zipnn | |
| def decompress_file(input_file, delete=False, force=False, hf_cache=False): | |
| import zipnn | |
| if not input_file.endswith(".znn"): | |
| raise ValueError("Input file does not have the '.znn' suffix") | |
| if os.path.exists(input_file): | |
| decompressed_path = input_file[:-4] | |
| if not force and os.path.exists(decompressed_path): | |
| user_input = ( | |
| input(f"{decompressed_path} already exists; overwrite (y/n)? ").strip().lower() | |
| ) | |
| if user_input not in ("yes", "y"): | |
| print(f"Skipping {input_file}...") | |
| return | |
| print(f"Decompressing {input_file}...") | |
| output_file = input_file[:-4] | |
| zpn = zipnn.ZipNN(is_streaming=True) | |
| with open(input_file, "rb") as infile, open(output_file, "wb") as outfile: | |
| d_data = b"" | |
| chunk = infile.read() | |
| d_data += zpn.decompress(chunk) | |
| outfile.write(d_data) | |
| print(f"Decompressed {input_file} to {output_file}") | |
| if delete and not hf_cache: | |
| print(f"Deleting {input_file}...") | |
| os.remove(input_file) | |
| if hf_cache: | |
| # If the file is in the Hugging Face cache, fix the symlinks | |
| print(f"{YELLOW}Reorganizing Hugging Face cache...{RESET}") | |
| try: | |
| snapshot_path = os.path.dirname(input_file) | |
| blob_name = os.path.join(snapshot_path, os.readlink(input_file)) | |
| os.rename(output_file, blob_name) | |
| os.symlink(blob_name, output_file) | |
| if os.path.exists(input_file): | |
| os.remove(input_file) | |
| except Exception as e: | |
| raise Exception(f"Error reorganizing Hugging Face cache: {e}") | |
| else: | |
| print(f"Error: The file {input_file} does not exist.") | |
| if __name__ == "__main__": | |
| check_and_install_zipnn() | |
| parser = argparse.ArgumentParser(description="Enter a file path to decompress.") | |
| parser.add_argument("input_file", type=str, help="Specify the path to the file to decompress.") | |
| parser.add_argument( | |
| "--delete", | |
| action="store_true", | |
| help="A flag that triggers deletion of a single compressed file instead of decompression", | |
| ) | |
| parser.add_argument( | |
| "--force", action="store_true", help="A flag that forces overwriting when decompressing." | |
| ) | |
| parser.add_argument( | |
| "--hf_cache", | |
| action="store_true", | |
| help="A flag that indicates if the file is in the Hugging Face cache.", | |
| ) | |
| args = parser.parse_args() | |
| optional_kwargs = {} | |
| if args.delete: | |
| optional_kwargs["delete"] = args.delete | |
| if args.force: | |
| optional_kwargs["force"] = args.force | |
| if args.hf_cache: | |
| optional_kwargs["hf_cache"] = args.hf_cache | |
| decompress_file(args.input_file, **optional_kwargs) | |