In [None]:
import json
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch


In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Device set to: {device}")


In [None]:
# Load CLI Q&A dataset
with open("cli_questions.json", "r", encoding="utf-8") as f:
 data = json.load(f)

# Access the list of entries inside "data" key
qa_list = data["data"]

# Show a sample
print(f"Total entries: {len(qa_list)}")
print("Sample entry:", qa_list[0])


In [None]:
model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)
model.to(device)


In [None]:
generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=-1) # -1 for CPU


In [None]:
# Pick sample questions
sample_questions = [entry["question"] for entry in qa_list[:5]]

# Generate and print answers
for i, question in enumerate(sample_questions):
 print(f"Q{i+1}: {question}")
 output = generator(question, max_new_tokens=150, do_sample=True, temperature=0.7)
 print(f"A{i+1}: {output[0]['generated_text']}\n{'-'*60}")


In [None]:
prompt = f"Q: {question}\nA:"
output = generator(prompt, max_new_tokens=100, do_sample=True, temperature=0.7)
print(output[0]["generated_text"])


In [None]:
import json

# Load the dataset
with open("cli_questions.json", "r") as f:
 raw = json.load(f)
 data = raw["data"] # ensure this matches your JSON structure

# Generate answers
results = []
for i, item in enumerate(data[:50]): # run on subset first
 question = item["question"]
 prompt = f"Q: {question}\nA:"
 output = generator(prompt, max_new_tokens=150, temperature=0.7, do_sample=True)
 answer = output[0]["generated_text"].split("A:")[1].strip() if "A:" in output[0]["generated_text"] else output[0]["generated_text"]
 results.append({"question": question, "answer": answer})
 print(f"Q{i+1}: {question}\nA{i+1}: {answer}\n{'-'*60}")


In [None]:
!pip install transformers datasets peft accelerate bitsandbytes trl --quiet


In [None]:
print("Top-level keys:", data.keys() if isinstance(data, dict) else "Not a dict")
print("Preview:", str(data)[:500]) # Print first 500 chars of the content


In [None]:
import json
from datasets import Dataset

# Load and extract Q&A list
with open("cli_questions.json", "r") as f:
 raw = json.load(f)
 data_list = raw["data"] # ✅ correct key now

# Convert to prompt/response format
for sample in data_list:
 sample["prompt"] = sample["question"]
 sample["response"] = sample["answer"]

# Create HuggingFace Dataset
dataset = Dataset.from_list(data_list)
dataset = dataset.train_test_split(test_size=0.1)

print("Loaded dataset:", dataset)


In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM

model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" # or try "microsoft/phi-2"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
 model_name,
 device_map="auto",
 load_in_4bit=True # For LoRA on low-resource
)


In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM

model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
 model_name,
 torch_dtype="auto", # or torch.float32 if you get another dtype error
 device_map="cpu" # force CPU since no supported GPU found
)


In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

bnb_config = BitsAndBytesConfig(
 load_in_4bit=True,
 bnb_4bit_use_double_quant=True,
 bnb_4bit_quant_type="nf4",
 bnb_4bit_compute_dtype=torch.float16,
)

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
 model_name,
 device_map="auto",
 quantization_config=bnb_config
)


In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

bnb_config = BitsAndBytesConfig(
 load_in_4bit=True,
 bnb_4bit_use_double_quant=True,
 bnb_4bit_quant_type="nf4",
 bnb_4bit_compute_dtype=torch.float16,
)

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
 model_name,
 device_map="auto",
 quantization_config=bnb_config
)


In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM

model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
 model_name,
 device_map="auto", # This will still use CPU if no GPU is found
)


In [None]:
model = AutoModelForCausalLM.from_pretrained(
 model_name,
 device_map="auto",
 torch_dtype=torch.float32 # or float16 if your CPU supports it
)


In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Device set to: {device}")


In [None]:
import json
import torch
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, DataCollatorForLanguageModeling
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

In [None]:
import json
import torch
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, DataCollatorForLanguageModeling
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

In [None]:
import json

with open("cli_questions.json") as f:
 data = json.load(f)

# Check the top-level structure
print(type(data)) # Should print 
print(data.keys()) # See what keys are at the top


In [None]:
import json
from datasets import Dataset

# Load the JSON and extract the list
with open("cli_questions.json") as f:
 raw = json.load(f)

qa_list = raw["data"] # access the list inside the 'data' key

# Format for instruction tuning
formatted_data = [
 {"text": f"### Question:\n{item['question']}\n\n### Answer:\n{item['answer']}"}
 for item in qa_list
]

# Convert to Hugging Face dataset
dataset = Dataset.from_list(formatted_data)

# Preview
print(f"Loaded {len(dataset)} formatted examples")
print(dataset[0])


In [None]:
from transformers import AutoTokenizer

model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" # You can switch to Phi-2 if you prefer

tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token # Needed for causal LM padding

# Tokenization function
def tokenize(example):
 return tokenizer(example["text"], padding="max_length", truncation=True, max_length=512)

tokenized_dataset = dataset.map(tokenize, batched=True)
tokenized_dataset = tokenized_dataset.remove_columns(["text"])

tokenized_dataset.set_format(type="torch")
print(tokenized_dataset[0])


In [None]:
train_dataset = tokenized_dataset


In [None]:
# Use entire dataset as training set
train_dataset = tokenized_dataset


In [None]:
tokenized_dataset.save_to_disk("tokenized_dataset")


In [None]:
from datasets import load_from_disk

# Load the saved dataset
tokenized_dataset = load_from_disk("tokenized_dataset")


In [None]:
train_dataset = tokenized_dataset # Use full set for training since it's only 172 examples


In [None]:
from transformers import DataCollatorForLanguageModeling

data_collator = DataCollatorForLanguageModeling(
 tokenizer=tokenizer,
 mlm=False
)


In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, DataCollatorForLanguageModeling
from peft import get_peft_model, LoraConfig, prepare_model_for_kbit_training
from datasets import load_from_disk
import torch

# Load model and tokenizer (TinyLlama)
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token # Important for Trainer padding

model = AutoModelForCausalLM.from_pretrained(model_name)

# Setup LoRA config
lora_config = LoraConfig(
 r=8,
 lora_alpha=16,
 target_modules=["q_proj", "v_proj"],
 lora_dropout=0.1,
 bias="none",
 task_type="CAUSAL_LM"
)

# Inject LoRA adapters
model = get_peft_model(model, lora_config)

# Load the tokenized dataset
dataset = load_from_disk("tokenized_dataset")

# Setup data collator
data_collator = DataCollatorForLanguageModeling(
 tokenizer=tokenizer,
 mlm=False
)

# Training args
training_args = TrainingArguments(
 output_dir="./lora-tinyllama-output",
 per_device_train_batch_size=2, # Small batch size for CPU
 gradient_accumulation_steps=4,
 num_train_epochs=1, # Reduce for quicker runs
 logging_steps=10,
 save_strategy="epoch",
 learning_rate=2e-4,
 fp16=False, # Don't use fp16 on CPU
 report_to="none"
)

# Define Trainer
trainer = Trainer(
 model=model,
 args=training_args,
 train_dataset=dataset,
 tokenizer=tokenizer,
 data_collator=data_collator
)

# Start training
trainer.train()


In [None]:
pip install -r requirements.txt


In [None]:
login(token="REMOVED_TOKEN_...")


In [None]:
from huggingface_hub import login
import os

# Safer login using environment variable (no token exposed in notebook)
login(token=os.getenv("HF_TOKEN"))
