In [18]:
import os
import shutil
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Paths
DRIVE_MODEL_FOLDER = "/content/drive/MyDrive/Model1Folder"
LOCAL_WORK_FOLDER = "/content/hi"
model_folder = os.path.join(LOCAL_WORK_FOLDER, "hi")

# Create destination folder
os.makedirs(model_folder, exist_ok=True)

# Copy model files if available
if not os.path.exists(DRIVE_MODEL_FOLDER):
    print(f"‚ùå Model folder not found at: {DRIVE_MODEL_FOLDER}")
else:
    print(f"‚úÖ Model folder found at: {DRIVE_MODEL_FOLDER}")
    for filename in os.listdir(DRIVE_MODEL_FOLDER):
        shutil.copy2(os.path.join(DRIVE_MODEL_FOLDER, filename), os.path.join(model_folder, filename))
    print(f"‚úÖ All files copied to: {model_folder}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
‚úÖ Model folder found at: /content/drive/MyDrive/Model1Folder
‚úÖ All files copied to: /content/hi/hi


In [None]:
# Install dependencies
!pip install llama-cpp-python groq --quiet

import os
import re
from groq import Groq
from llama_cpp import Llama

# === Configuration ===
# Replace with your own Groq API key
client = Groq(api_key="")

# Local model path
LOCAL_WORK_FOLDER = "/content/hi/hi"
MODEL_FILENAME = "unsloth.Q4_K_M.gguf"
MODEL_PATH = f"{LOCAL_WORK_FOLDER}/{MODEL_FILENAME}"

# === Load LLaMA model ===
print(f"Loading llama-cpp model from: {MODEL_PATH}")
llm = Llama(
    model_path=MODEL_PATH,
    n_ctx=2048,
    n_threads=8,
    n_gpu_layers=0,
    verbose=False,
)

# === Generate SQL Questions ===
def generate_sql_questions_groq(num=10):
    prompt = f"""
Generate exactly {num} *distinct* SQL questions. Each should consist of:
- SQL table creation
- Some INSERT statements
- A final SQL-related question

üìå Format each as a single block, like:

CREATE TABLE Students (Name VARCHAR(50), Marks INT);
INSERT INTO Students (Name, Marks) VALUES ('Alice', 85), ('Bob', 90);
How do I find the average marks of all students?

Separate each block with two newlines.
Do NOT generate more than {num} questions.
Only output the questions ‚Äî no explanation.
"""
    chat_completion = client.chat.completions.create(
        model="llama3-8b-8192",
        messages=[{"role": "user", "content": prompt}],
    )
    raw_output = chat_completion.choices[0].message.content.strip()

    # Post-process: limit to `num` questions max
    all_questions = [p.strip() for p in raw_output.split("\n\n") if p.strip()]
    trimmed = all_questions[:num]
    return "\n\n".join(trimmed)


# === Get Answers from LLaMA ===
def get_llama_answers(questions):
    answers = []
    if llm is None:
        print("LLaMA model not loaded. Skipping answer generation.")
        return answers

    for i, message in enumerate(questions):
        print(f"Running LLaMA on question {i+1} / {len(questions)}")
        response = llm.create_chat_completion(
            messages=[message],
            temperature=0.7,
            max_tokens=256,
            stop=["</s>"]
        )
        ans = response["choices"][0]["message"]["content"]
        answers.append({"question": message["content"], "answer": ans})
    return answers

# === Evaluate Answers using Groq ===
def evaluate_answers_groq(qa_pairs):
    evaluations = []
    total_score = 0

    for i, qa in enumerate(qa_pairs):
        print(f"Evaluating answer {i+1} with Groq")
        eval_prompt = f"""
You are an expert SQL tutor.

Question:
{qa['question']}

Answer:
{qa['answer']}

Evaluate the correctness and completeness of the answer.
Rate it 1 to 10 and provide a brief explanation.
"""
        chat_completion = client.chat.completions.create(
            model="gemma2-9b-it",
            messages=[{"role": "user", "content": eval_prompt}],
        )
        evaluation_text = chat_completion.choices[0].message.content.strip()

        # Extract score (1 to 10) using regex
        score_match = re.search(r'\b([1-9]|10)\b', evaluation_text)
        score = int(score_match.group(1)) if score_match else 0
        total_score += score

        evaluations.append({
            "question": qa["question"],
            "answer": qa["answer"],
            "evaluation": evaluation_text,
            "score": score,
            "percentage": round(score * 10, 1)
        })

    avg_score = round(total_score / len(qa_pairs), 2)
    avg_percentage = round(avg_score * 10, 2)

    print(f"\n=== Summary ===")
    print(f"Average Score: {avg_score}/10")
    print(f"Average Percentage: {avg_percentage}%")

    return evaluations

# === Main Pipeline ===
if llm:
    print("Generating SQL questions...")
    questions_text = generate_sql_questions_groq(10)
    questions = parse_questions(questions_text)

    print(f"\nGenerated {len(questions)} questions.\n")

    answers = get_llama_answers(questions)

    if answers:
        evaluations = evaluate_answers_groq(answers)

        print("\n\n=== Detailed Results ===\n")
        for i, res in enumerate(evaluations):
            print(f"--- QA Pair {i+1} ---")
            print("Question:\n", res["question"])
            print("Answer:\n", res["answer"])
            print("Evaluation:\n", res["evaluation"])
            print(f"Score: {res['score']}/10  ({res['percentage']}%)\n\n")
else:
    print("LLaMA model not loaded. Cannot proceed.")


Loading llama-cpp model from: /content/hi/hi/unsloth.Q4_K_M.gguf


llama_context: n_ctx_per_seq (2048) < n_ctx_train (131072) -- the full capacity of the model will not be utilized
llama_kv_cache_unified: LLAMA_SET_ROWS=0, using old ggml_cpy() method for backwards compatibility


Generating SQL questions...

Generated 10 questions.

Running LLaMA on question 1 / 10
Running LLaMA on question 2 / 10
Running LLaMA on question 3 / 10
Running LLaMA on question 4 / 10
Running LLaMA on question 5 / 10
Running LLaMA on question 6 / 10
Running LLaMA on question 7 / 10
Running LLaMA on question 8 / 10
Running LLaMA on question 9 / 10
Running LLaMA on question 10 / 10
Evaluating answer 1 with Groq
Evaluating answer 2 with Groq
Evaluating answer 3 with Groq
Evaluating answer 4 with Groq
Evaluating answer 5 with Groq
Evaluating answer 6 with Groq
Evaluating answer 7 with Groq
Evaluating answer 8 with Groq
Evaluating answer 9 with Groq
Evaluating answer 10 with Groq

=== Summary ===
Average Score: 6.5/10
Average Percentage: 65.0%


=== Detailed Results ===

--- QA Pair 1 ---
Question:
 CREATE TABLE Orders (OrderID INT, CustomerID INT, OrderDate DATE);
INSERT INTO Orders (OrderID, CustomerID, OrderDate) VALUES (1, 101, '2021-01-01'), (2, 102, '2021-01-15');
Answer:
 SELECT * 