NLP2425
/

machine_learning

Model card Files Files and versions

xet

Community

josipabebic commited on Jun 7

Commit

1e3caee

verified ·

1 Parent(s): c784fa7

Delete corrected version.ipynb

Browse files

Files changed (1) hide show

corrected version.ipynb +0 -150

corrected version.ipynb DELETED Viewed

@@ -1,150 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "ca825e7e",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "Treniranje modela: Logistic regression...\n",
-      "Predikcija na Test 1...\n",
-      "Predikcija na Test 2...\n",
-      "Predikcija na Test 3...\n",
-      "\n",
-      "Treniranje modela: SVM RBF kernel...\n",
-      "Predikcija na Test 1...\n",
-      "Predikcija na Test 2...\n",
-      "Predikcija na Test 3...\n",
-      "\n",
-      "| #      | method           | algorithm                | skup          | Test 1                                                  | Test 2                                                  | Test 3                                                  |\n",
-      "|--------|------------------|--------------------------|--------------|---------------------------------------------------------|---------------------------------------------------------|---------------------------------------------------------|\n",
-      "| 1.a.i  | Machine learning | Logistic regression      | Train combined | Precision=0.640, Recall=0.614, F1=0.625, Accuracy=0.614 | Precision=0.632, Recall=0.630, F1=0.626, Accuracy=0.630 | Precision=0.717, Recall=0.691, F1=0.686, Accuracy=0.691 |\n",
-      "| 1.b.i  | Machine learning | SVM RBF kernel           | Train combined | Precision=0.652, Recall=0.632, F1=0.640, Accuracy=0.632 | Precision=0.621, Recall=0.626, F1=0.620, Accuracy=0.626 | Precision=0.764, Recall=0.741, F1=0.735, Accuracy=0.741 |\n"
-     ]
-    }
-   ],
-   "source": [
-    "import pandas as pd\n",
-    "from sklearn.linear_model import LogisticRegression\n",
-    "from sklearn.svm import SVC\n",
-    "from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, confusion_matrix, ConfusionMatrixDisplay\n",
-    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
-    "import matplotlib.pyplot as plt\n",
-    "import os\n",
-    "\n",
-    "# Folder za spremanje confusion matrica\n",
-    "os.makedirs('confusion_matrices', exist_ok=True)\n",
-    "\n",
-    "# Definicije datoteka\n",
-    "train_files = ['train-1.csv', 'train-2.csv', 'train-3.csv']\n",
-    "test_files = ['test-1.csv', 'test-2.csv', 'test-3.csv']\n",
-    "test_names = ['Test 1', 'Test 2', 'Test 3']\n",
-    "\n",
-    "text_column = 'Sentence'\n",
-    "target_column = 'Label'\n",
-    "\n",
-    "# Funkcija za učitavanje podataka\n",
-    "def load_data(file):\n",
-    "    df = pd.read_csv(file)\n",
-    "    X_text = df[text_column].astype(str)\n",
-    "    y = df[target_column]\n",
-    "    return X_text, y\n",
-    "\n",
-    "# Funkcija za učitavanje i spajanje više train setova\n",
-    "def load_data_combined(files):\n",
-    "    X_all = []\n",
-    "    y_all = []\n",
-    "    for file in files:\n",
-    "        X_text, y = load_data(file)\n",
-    "        X_all.extend(X_text)\n",
-    "        y_all.extend(y)\n",
-    "    return X_all, y_all\n",
-    "\n",
-    "# Ažurirani modeli\n",
-    "models = [\n",
-    "    ('1.a', 'Machine learning', 'Logistic regression', LogisticRegression(max_iter=5000, solver='liblinear', class_weight='balanced')),\n",
-    "    ('1.b', 'Machine learning', 'SVM RBF kernel', SVC(class_weight='balanced', kernel='rbf', random_state=42))\n",
-    "]\n",
-    "\n",
-    "# Priprema tablice za rezultate\n",
-    "table = []\n",
-    "\n",
-    "# Učitavanje kombiniranih trening podataka\n",
-    "X_train_text, y_train = load_data_combined(train_files)\n",
-    "\n",
-    "# TF-IDF vektorizacija s proširenim parametrima\n",
-    "vectorizer = TfidfVectorizer(max_features=5000, ngram_range=(1, 3))\n",
-    "X_train = vectorizer.fit_transform(X_train_text)\n",
-    "\n",
-    "# Treniranje i evaluacija\n",
-    "for code, method, algorithm, model in models:\n",
-    "    print(f\"\\nTreniranje modela: {algorithm}...\")\n",
-    "    model.fit(X_train, y_train)\n",
-    "    \n",
-    "    row_train = [f\"{code}.i\", method, algorithm, \"Train combined\"]\n",
-    "    for idx, test_file in enumerate(test_files):\n",
-    "        print(f\"Predikcija na {test_names[idx]}...\")\n",
-    "        X_test_text, y_test = load_data(test_file)\n",
-    "        X_test = vectorizer.transform(X_test_text)\n",
-    "        \n",
-    "        y_pred = model.predict(X_test)\n",
-    "        precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)\n",
-    "        recall = recall_score(y_test, y_pred, average='weighted', zero_division=0)\n",
-    "        f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)\n",
-    "        accuracy = accuracy_score(y_test, y_pred)\n",
-    "        metric_str = f\"Precision={precision:.3f}, Recall={recall:.3f}, F1={f1:.3f}, Accuracy={accuracy:.3f}\"\n",
-    "        row_train.append(metric_str)\n",
-    "        \n",
-    "        # Confusion matrix\n",
-    "        cm = confusion_matrix(y_test, y_pred)\n",
-    "        disp = ConfusionMatrixDisplay(confusion_matrix=cm)\n",
-    "        disp.plot(cmap=plt.cm.Blues)\n",
-    "        plt.title(f'Confusion Matrix: {algorithm}\\nTrain: Combined Train Test: {test_names[idx]}')\n",
-    "        plt.savefig(f'confusion_matrices/cm_{algorithm.replace(\" \", \"_\")}_TrainCombined_{test_names[idx].replace(\" \", \"\")}.png')\n",
-    "        plt.close()\n",
-    "    table.append(row_train)\n",
-    "\n",
-    "# Ispis tablice u markdown formatu\n",
-    "header = \"| #      | method           | algorithm                | skup          | Test 1                                                  | Test 2                                                  | Test 3                                                  |\"\n",
-    "sep =    \"|--------|------------------|--------------------------|--------------|---------------------------------------------------------|---------------------------------------------------------|---------------------------------------------------------|\"\n",
-    "print(\"\\n\" + header)\n",
-    "print(sep)\n",
-    "for row in table:\n",
-    "    print(f\"| {row[0]:<6} | {row[1]:<16} | {row[2]:<24} | {row[3]:<12} | {row[4]:<55} | {row[5]:<55} | {row[6]:<55} |\")\n",
-    "\n",
-    "# Spremi rezultate u .md datoteku\n",
-    "with open('results_group2.md', 'w', encoding='utf-8') as f:\n",
-    "    f.write(header + \"\\n\")\n",
-    "    f.write(sep + \"\\n\")\n",
-    "    for row in table:\n",
-    "        f.write(f\"| {row[0]:<6} | {row[1]:<16} | {row[2]:<24} | {row[3]:<12} | {row[4]:<55} | {row[5]:<55} | {row[6]:<55} |\\n\")\n"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.6"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}