File size: 5,241 Bytes
692d5c6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 |
#!/usr/bin/env python3
"""
CTransformers usage example for Isaac Sim Robotics Qwen model.
This script demonstrates how to use the model with CTransformers
for lightweight, memory-efficient inference.
"""
from ctransformers import AutoModelForCausalLM
import argparse
import sys
def load_model(model_path, model_type="qwen2", gpu_layers=0):
"""
Load the Isaac Sim Robotics Qwen model using CTransformers.
Args:
model_path (str): Path to the CTransformers model
model_type (str): Model architecture type
gpu_layers (int): Number of layers to offload to GPU (0 = CPU only)
Returns:
AutoModelForCausalLM: Loaded model
"""
print(f"Loading CTransformers model from: {model_path}")
print(f"Model type: {model_type}, GPU layers: {gpu_layers}")
try:
model = AutoModelForCausalLM.from_pretrained(
model_path,
model_type=model_type,
gpu_layers=gpu_layers,
lib="avx2" # Use AVX2 optimizations if available
)
print("Model loaded successfully!")
return model
except Exception as e:
print(f"Error loading model: {e}")
print("Make sure you have the CTransformers model files in the specified directory.")
sys.exit(1)
def generate_response(model, query, max_length=1024, temperature=0.7):
"""
Generate a response using the CTransformers model.
Args:
model: The loaded CTransformers model
query (str): The input query
max_length (int): Maximum length of generated response
temperature (float): Sampling temperature
Returns:
str: Generated response
"""
# Format query for Qwen2.5-Coder
formatted_query = f"<|im_start|>user\n{query}<|im_end|>\n<|im_start|>assistant"
# Generate response
response = model(
formatted_query,
max_new_tokens=max_length,
temperature=temperature,
do_sample=True,
stop=["<|im_end|>", "<|im_start|>"]
)
# Extract only the assistant response
if "<|im_start|>assistant" in response:
response = response.split("<|im_start|>assistant")[1].strip()
return response
def main():
parser = argparse.ArgumentParser(description="Isaac Sim Robotics Qwen CTransformers Inference")
parser.add_argument(
"--model_path",
type=str,
default="models/ctransformers",
help="Path to CTransformers model directory"
)
parser.add_argument(
"--model_type",
type=str,
default="qwen2",
help="Model architecture type"
)
parser.add_argument(
"--gpu_layers",
type=int,
default=0,
help="Number of layers to offload to GPU (0 = CPU only)"
)
parser.add_argument(
"--max_length",
type=int,
default=1024,
help="Maximum length of generated response"
)
parser.add_argument(
"--temperature",
type=float,
default=0.7,
help="Sampling temperature"
)
parser.add_argument(
"--query",
type=str,
help="Query to ask (if not provided, will use interactive mode)"
)
args = parser.parse_args()
try:
# Load model
model = load_model(
args.model_path,
model_type=args.model_type,
gpu_layers=args.gpu_layers
)
if args.query:
# Single query mode
response = generate_response(
model, args.query, args.max_length, args.temperature
)
print(f"\nQuery: {args.query}")
print(f"Response:\n{response}")
else:
# Interactive mode
print("\n=== Isaac Sim Robotics Qwen CTransformers Interactive Mode ===")
print("Type 'quit' to exit")
print("Example queries:")
print("- How do I create a differential drive robot in Isaac Sim?")
print("- How to add a depth camera to my robot?")
print("- What physics parameters should I use for a manipulator?")
print()
while True:
try:
query = input("Enter your Isaac Sim question: ").strip()
if query.lower() in ['quit', 'exit', 'q']:
break
if not query:
continue
print("Generating response...")
response = generate_response(
model, query, args.max_length, args.temperature
)
print(f"\nResponse:\n{response}\n")
except KeyboardInterrupt:
print("\nExiting...")
break
except Exception as e:
print(f"Error generating response: {e}")
except Exception as e:
print(f"Error: {e}")
sys.exit(1)
if __name__ == "__main__":
main() |