File size: 7,896 Bytes
2d31fd4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 |
#!/bin/bash
# MLX Server Launcher for Dragon M3 Ultra
# Created: January 2025 for MLX 0.26+
# Supports local/remote models with full parameter control
# Text formatting
BOLD="\033[1m"
BLUE="\033[34m"
GREEN="\033[32m"
YELLOW="\033[33m"
RED="\033[31m"
CYAN="\033[36m"
MAGENTA="\033[35m"
RESET="\033[0m"
# Detect system specs
TOTAL_MEMORY=$(sysctl -n hw.memsize 2>/dev/null || echo 0)
TOTAL_MEMORY_GB=$((TOTAL_MEMORY / 1073741824))
CPU_BRAND=$(sysctl -n machdep.cpu.brand_string 2>/dev/null || echo "Unknown")
# Check if running on M3 Ultra
if [[ "$CPU_BRAND" == *"M3 Ultra"* ]] || [[ "$TOTAL_MEMORY_GB" -ge 400 ]]; then
IS_M3_ULTRA=true
echo -e "${BOLD}${MAGENTA}🐉 Dragon M3 Ultra detected! (${TOTAL_MEMORY_GB}GB RAM)${RESET}"
else
IS_M3_ULTRA=false
fi
echo -e "${BOLD}${BLUE}=====================================${RESET}"
echo -e "${BOLD}${BLUE} MLX Server Launcher v1.0 ${RESET}"
echo -e "${BOLD}${BLUE}=====================================${RESET}"
echo -e "Launch MLX model server with custom parameters\n"
# Default values
DEFAULT_MODEL="/Users/polyversai/.lmstudio/models/LibraxisAI/c4ai-command-a-03-2025-q5-mlx"
DEFAULT_HOST="0.0.0.0"
DEFAULT_PORT="12345"
DEFAULT_TEMP="0.7"
DEFAULT_TOP_P="0.95"
DEFAULT_TOP_K="0"
DEFAULT_MIN_P="0.0"
DEFAULT_MAX_TOKENS="2048"
DEFAULT_LOG_LEVEL="INFO"
# Get model path
echo -e "${BOLD}Model path (local or HF repo):${RESET}"
echo -e "(Default: ${DEFAULT_MODEL})"
echo -e "${CYAN}Examples:${RESET}"
echo -e " Local: /Users/polyversai/.lmstudio/models/mlx-community/model-name"
echo -e " HF: mlx-community/Llama-3.2-3B-Instruct-4bit"
read -p "> " MODEL_PATH
MODEL_PATH=${MODEL_PATH:-$DEFAULT_MODEL}
# Check if it's a local path
if [[ -d "$MODEL_PATH" ]]; then
echo -e "${GREEN}✓ Local model detected: ${MODEL_PATH}${RESET}"
else
echo -e "${GREEN}✓ Remote model specified: ${MODEL_PATH}${RESET}"
fi
# Network configuration
echo -e "\n${BOLD}Host IP address:${RESET}"
echo -e "(Default: ${DEFAULT_HOST} - accessible from network)"
echo -e "Use 127.0.0.1 for localhost only"
read -p "> " HOST
HOST=${HOST:-$DEFAULT_HOST}
echo -e "\n${BOLD}Port number:${RESET}"
echo -e "(Default: ${DEFAULT_PORT})"
read -p "> " PORT
PORT=${PORT:-$DEFAULT_PORT}
# Sampling parameters
echo -e "\n${BOLD}${CYAN}=== Sampling Parameters ===${RESET}"
echo -e "\n${BOLD}Temperature (creativity):${RESET}"
echo -e "Range: 0.0-2.0 (Default: ${DEFAULT_TEMP})"
echo -e "${YELLOW}0.0 = deterministic, 1.0 = balanced, 2.0 = very creative${RESET}"
read -p "> " TEMP
TEMP=${TEMP:-$DEFAULT_TEMP}
echo -e "\n${BOLD}Top-p (nucleus sampling):${RESET}"
echo -e "Range: 0.0-1.0 (Default: ${DEFAULT_TOP_P})"
echo -e "${YELLOW}Lower = more focused, Higher = more diverse${RESET}"
read -p "> " TOP_P
TOP_P=${TOP_P:-$DEFAULT_TOP_P}
echo -e "\n${BOLD}Top-k (vocabulary limit):${RESET}"
echo -e "Default: ${DEFAULT_TOP_K} (0 = disabled)"
echo -e "${YELLOW}Limits selection to top K tokens${RESET}"
read -p "> " TOP_K
TOP_K=${TOP_K:-$DEFAULT_TOP_K}
echo -e "\n${BOLD}Min-p (minimum probability):${RESET}"
echo -e "Range: 0.0-1.0 (Default: ${DEFAULT_MIN_P})"
echo -e "${YELLOW}0.0 = disabled, higher = filter low probability tokens${RESET}"
read -p "> " MIN_P
MIN_P=${MIN_P:-$DEFAULT_MIN_P}
echo -e "\n${BOLD}Max tokens per response:${RESET}"
echo -e "(Default: ${DEFAULT_MAX_TOKENS})"
if [[ "$IS_M3_ULTRA" == true ]]; then
echo -e "${MAGENTA}Dragon can handle 8192+ tokens easily${RESET}"
fi
read -p "> " MAX_TOKENS
MAX_TOKENS=${MAX_TOKENS:-$DEFAULT_MAX_TOKENS}
# Optional adapter
echo -e "\n${BOLD}LoRA adapter path (optional):${RESET}"
echo -e "(Leave empty if not using adapters)"
read -p "> " ADAPTER_PATH
if [[ -n "$ADAPTER_PATH" ]]; then
ADAPTER_OPTION="--adapter-path ${ADAPTER_PATH}"
else
ADAPTER_OPTION=""
fi
# Chat template args
echo -e "\n${BOLD}Chat template args (optional JSON):${RESET}"
echo -e "Example: {\"enable_thinking\":false}"
echo -e "(Leave empty for defaults)"
read -p "> " CHAT_TEMPLATE_ARGS
if [[ -n "$CHAT_TEMPLATE_ARGS" ]]; then
CHAT_TEMPLATE_OPTION="--chat-template-args \"${CHAT_TEMPLATE_ARGS}\""
else
CHAT_TEMPLATE_OPTION=""
fi
# Log level
echo -e "\n${BOLD}Log level:${RESET}"
echo -e "(Default: ${DEFAULT_LOG_LEVEL}, Options: DEBUG, INFO, WARNING, ERROR, CRITICAL)"
read -p "> " LOG_LEVEL
LOG_LEVEL=${LOG_LEVEL:-$DEFAULT_LOG_LEVEL}
# Build the command
SERVER_CMD="uv run mlx_lm.server --model ${MODEL_PATH} --host ${HOST} --port ${PORT} --temp ${TEMP} --top-p ${TOP_P} --top-k ${TOP_K} --min-p ${MIN_P} --max-tokens ${MAX_TOKENS} --log-level ${LOG_LEVEL} ${ADAPTER_OPTION} ${CHAT_TEMPLATE_OPTION}"
# Print preview
echo -e "\n${BOLD}${YELLOW}Command Preview:${RESET}"
echo -e "$SERVER_CMD"
# Launch mode selection
echo -e "\n${BOLD}${GREEN}Launch mode:${RESET}"
echo -e "1. ${YELLOW}Foreground${RESET} - See logs in terminal (Ctrl+C to stop)"
echo -e "2. ${YELLOW}Background with logging${RESET} - Logs to mlx-server.log"
echo -e "3. ${YELLOW}Background detached${RESET} - Run with nohup"
echo -e "4. ${YELLOW}Just copy command${RESET} - Don't launch"
read -p "> " LAUNCH_MODE
# Create logs directory if needed
if [[ "$LAUNCH_MODE" == "2" || "$LAUNCH_MODE" == "3" ]]; then
mkdir -p logs
LOG_FILE="logs/mlx-server-$(date +%Y%m%d-%H%M%S).log"
fi
case "$LAUNCH_MODE" in
1)
echo -e "\n${BOLD}${GREEN}Starting server in foreground...${RESET}"
echo -e "${YELLOW}Press Ctrl+C to stop${RESET}\n"
eval "$SERVER_CMD"
;;
2)
echo -e "\n${BOLD}${GREEN}Starting server in background...${RESET}"
echo -e "Logs: ${LOG_FILE}"
eval "$SERVER_CMD" > "${LOG_FILE}" 2>&1 &
SERVER_PID=$!
echo -e "${GREEN}✓ Server started with PID: ${SERVER_PID}${RESET}"
echo -e "\nTo monitor: tail -f ${LOG_FILE}"
echo -e "To stop: kill ${SERVER_PID}"
# Save PID for easy stopping
echo $SERVER_PID > logs/mlx-server.pid
;;
3)
echo -e "\n${BOLD}${GREEN}Starting server with nohup...${RESET}"
echo -e "Logs: ${LOG_FILE}"
nohup bash -c "$SERVER_CMD" > "${LOG_FILE}" 2>&1 &
SERVER_PID=$!
echo -e "${GREEN}✓ Server started with PID: ${SERVER_PID}${RESET}"
echo -e "\nTo monitor: tail -f ${LOG_FILE}"
echo -e "To stop: kill ${SERVER_PID}"
# Save PID
echo $SERVER_PID > logs/mlx-server.pid
;;
4)
echo -e "\n${BOLD}${GREEN}Command copied to clipboard!${RESET}"
echo "$SERVER_CMD" | pbcopy
;;
*)
echo -e "\n${RED}Invalid choice. Exiting.${RESET}"
exit 1
;;
esac
# Print API examples
if [[ "$LAUNCH_MODE" != "4" ]]; then
echo -e "\n${BOLD}${BLUE}=== API Usage Examples ===${RESET}"
echo -e "\n${CYAN}1. Chat completion:${RESET}"
echo -e "curl http://${HOST}:${PORT}/v1/chat/completions \\"
echo -e " -H \"Content-Type: application/json\" \\"
echo -e " -d '{"
echo -e " \"messages\": [{\"role\": \"user\", \"content\": \"Hello!\"}],"
echo -e " \"temperature\": ${TEMP},"
echo -e " \"max_tokens\": 100"
echo -e " }'"
echo -e "\n${CYAN}2. Check models:${RESET}"
echo -e "curl http://${HOST}:${PORT}/v1/models"
echo -e "\n${CYAN}3. Health check:${RESET}"
echo -e "curl http://${HOST}:${PORT}/health"
if [[ "$IS_M3_ULTRA" == true ]]; then
echo -e "\n${BOLD}${MAGENTA}Dragon Performance Monitoring:${RESET}"
echo -e "# In another terminal:"
echo -e "watch -n 1 'curl -s http://${HOST}:${PORT}/health | jq .'"
fi
fi
echo -e "\n${BOLD}${BLUE}=====================================${RESET}"
echo -e "${BOLD}${GREEN}✨ MLX Server ready!${RESET}"
if [[ "$IS_M3_ULTRA" == true ]]; then
echo -e "${BOLD}${MAGENTA}🐉 Dragon M3 Ultra serving at full power!${RESET}"
fi
echo -e "${BOLD}${BLUE}=====================================${RESET}" |