#!/usr/bin/env python3 """ CTransformers usage example for Isaac Sim Robotics Qwen model. This script demonstrates how to use the model with CTransformers for lightweight, memory-efficient inference. """ from ctransformers import AutoModelForCausalLM import argparse import sys def load_model(model_path, model_type="qwen2", gpu_layers=0): """ Load the Isaac Sim Robotics Qwen model using CTransformers. Args: model_path (str): Path to the CTransformers model model_type (str): Model architecture type gpu_layers (int): Number of layers to offload to GPU (0 = CPU only) Returns: AutoModelForCausalLM: Loaded model """ print(f"Loading CTransformers model from: {model_path}") print(f"Model type: {model_type}, GPU layers: {gpu_layers}") try: model = AutoModelForCausalLM.from_pretrained( model_path, model_type=model_type, gpu_layers=gpu_layers, lib="avx2" # Use AVX2 optimizations if available ) print("Model loaded successfully!") return model except Exception as e: print(f"Error loading model: {e}") print("Make sure you have the CTransformers model files in the specified directory.") sys.exit(1) def generate_response(model, query, max_length=1024, temperature=0.7): """ Generate a response using the CTransformers model. Args: model: The loaded CTransformers model query (str): The input query max_length (int): Maximum length of generated response temperature (float): Sampling temperature Returns: str: Generated response """ # Format query for Qwen2.5-Coder formatted_query = f"<|im_start|>user\n{query}<|im_end|>\n<|im_start|>assistant" # Generate response response = model( formatted_query, max_new_tokens=max_length, temperature=temperature, do_sample=True, stop=["<|im_end|>", "<|im_start|>"] ) # Extract only the assistant response if "<|im_start|>assistant" in response: response = response.split("<|im_start|>assistant")[1].strip() return response def main(): parser = argparse.ArgumentParser(description="Isaac Sim Robotics Qwen CTransformers Inference") parser.add_argument( "--model_path", type=str, default="models/ctransformers", help="Path to CTransformers model directory" ) parser.add_argument( "--model_type", type=str, default="qwen2", help="Model architecture type" ) parser.add_argument( "--gpu_layers", type=int, default=0, help="Number of layers to offload to GPU (0 = CPU only)" ) parser.add_argument( "--max_length", type=int, default=1024, help="Maximum length of generated response" ) parser.add_argument( "--temperature", type=float, default=0.7, help="Sampling temperature" ) parser.add_argument( "--query", type=str, help="Query to ask (if not provided, will use interactive mode)" ) args = parser.parse_args() try: # Load model model = load_model( args.model_path, model_type=args.model_type, gpu_layers=args.gpu_layers ) if args.query: # Single query mode response = generate_response( model, args.query, args.max_length, args.temperature ) print(f"\nQuery: {args.query}") print(f"Response:\n{response}") else: # Interactive mode print("\n=== Isaac Sim Robotics Qwen CTransformers Interactive Mode ===") print("Type 'quit' to exit") print("Example queries:") print("- How do I create a differential drive robot in Isaac Sim?") print("- How to add a depth camera to my robot?") print("- What physics parameters should I use for a manipulator?") print() while True: try: query = input("Enter your Isaac Sim question: ").strip() if query.lower() in ['quit', 'exit', 'q']: break if not query: continue print("Generating response...") response = generate_response( model, query, args.max_length, args.temperature ) print(f"\nResponse:\n{response}\n") except KeyboardInterrupt: print("\nExiting...") break except Exception as e: print(f"Error generating response: {e}") except Exception as e: print(f"Error: {e}") sys.exit(1) if __name__ == "__main__": main()