Makefile for converting Hugging Face .safetensors model to .gguf (llama.cpp)
================================
CONFIGURATION
================================
PYTHON ?= python3 CONVERT_SCRIPT = convert_hf_to_gguf.py MODEL_DIR ?= /path/to/your/model OUTFILE ?= model.gguf OUTTYPE ?= f16 QUANT_TYPE ?= Q4_K_M QUANT_OUT ?= model-$(QUANT_TYPE).gguf
================================
TARGETS
================================
1. Clone llama.cpp
clone: git clone https://github.com/ggerganov/llama.cpp.git cd llama.cpp
2. Install Python dependencies
deps: pip install -r llama.cpp/requirements.txt
3. Convert safetensors -> gguf
convert: $(PYTHON) llama.cpp/$(CONVERT_SCRIPT) $(MODEL_DIR) --outfile $(OUTFILE) --outtype $(OUTTYPE)
python convert_hf_to_gguf.py D:\pytorch\safetensor-video\Qwen2.5-3B-Instruct --outfile model.gguf --outtype f16
4. Quantize gguf
quantize: ./llama.cpp/quantize $(OUTFILE) $(QUANT_OUT) $(QUANT_TYPE)
5. Build llama.cpp with CUDA support
cuda: cd llama.cpp && mkdir -p build && cd build && cmake -DLLAMA_CUBLAS=on .. && make -j
6. Clean
clean: rm -f $(OUTFILE) $(QUANT_OUT)
7. Help
help: @echo "Usage:" @echo " make clone - Clone llama.cpp repo" @echo " make deps - Install Python dependencies" @echo " make convert - Convert HuggingFace safetensors -> GGUF" @echo " make quantize - Quantize GGUF model (default: Q4_K_M)" @echo " make cuda - Build llama.cpp with CUDA/cuBLAS support" @echo " make clean - Remove converted/quantized files"
Model tree for ankitkushwaha90/safetensors_convert_gguf_model
Base model
openai/gpt-oss-120b