josipabebic commited on
Commit
1e3caee
·
verified ·
1 Parent(s): c784fa7

Delete corrected version.ipynb

Browse files
Files changed (1) hide show
  1. corrected version.ipynb +0 -150
corrected version.ipynb DELETED
@@ -1,150 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 1,
6
- "id": "ca825e7e",
7
- "metadata": {},
8
- "outputs": [
9
- {
10
- "name": "stdout",
11
- "output_type": "stream",
12
- "text": [
13
- "\n",
14
- "Treniranje modela: Logistic regression...\n",
15
- "Predikcija na Test 1...\n",
16
- "Predikcija na Test 2...\n",
17
- "Predikcija na Test 3...\n",
18
- "\n",
19
- "Treniranje modela: SVM RBF kernel...\n",
20
- "Predikcija na Test 1...\n",
21
- "Predikcija na Test 2...\n",
22
- "Predikcija na Test 3...\n",
23
- "\n",
24
- "| # | method | algorithm | skup | Test 1 | Test 2 | Test 3 |\n",
25
- "|--------|------------------|--------------------------|--------------|---------------------------------------------------------|---------------------------------------------------------|---------------------------------------------------------|\n",
26
- "| 1.a.i | Machine learning | Logistic regression | Train combined | Precision=0.640, Recall=0.614, F1=0.625, Accuracy=0.614 | Precision=0.632, Recall=0.630, F1=0.626, Accuracy=0.630 | Precision=0.717, Recall=0.691, F1=0.686, Accuracy=0.691 |\n",
27
- "| 1.b.i | Machine learning | SVM RBF kernel | Train combined | Precision=0.652, Recall=0.632, F1=0.640, Accuracy=0.632 | Precision=0.621, Recall=0.626, F1=0.620, Accuracy=0.626 | Precision=0.764, Recall=0.741, F1=0.735, Accuracy=0.741 |\n"
28
- ]
29
- }
30
- ],
31
- "source": [
32
- "import pandas as pd\n",
33
- "from sklearn.linear_model import LogisticRegression\n",
34
- "from sklearn.svm import SVC\n",
35
- "from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, confusion_matrix, ConfusionMatrixDisplay\n",
36
- "from sklearn.feature_extraction.text import TfidfVectorizer\n",
37
- "import matplotlib.pyplot as plt\n",
38
- "import os\n",
39
- "\n",
40
- "# Folder za spremanje confusion matrica\n",
41
- "os.makedirs('confusion_matrices', exist_ok=True)\n",
42
- "\n",
43
- "# Definicije datoteka\n",
44
- "train_files = ['train-1.csv', 'train-2.csv', 'train-3.csv']\n",
45
- "test_files = ['test-1.csv', 'test-2.csv', 'test-3.csv']\n",
46
- "test_names = ['Test 1', 'Test 2', 'Test 3']\n",
47
- "\n",
48
- "text_column = 'Sentence'\n",
49
- "target_column = 'Label'\n",
50
- "\n",
51
- "# Funkcija za učitavanje podataka\n",
52
- "def load_data(file):\n",
53
- " df = pd.read_csv(file)\n",
54
- " X_text = df[text_column].astype(str)\n",
55
- " y = df[target_column]\n",
56
- " return X_text, y\n",
57
- "\n",
58
- "# Funkcija za učitavanje i spajanje više train setova\n",
59
- "def load_data_combined(files):\n",
60
- " X_all = []\n",
61
- " y_all = []\n",
62
- " for file in files:\n",
63
- " X_text, y = load_data(file)\n",
64
- " X_all.extend(X_text)\n",
65
- " y_all.extend(y)\n",
66
- " return X_all, y_all\n",
67
- "\n",
68
- "# Ažurirani modeli\n",
69
- "models = [\n",
70
- " ('1.a', 'Machine learning', 'Logistic regression', LogisticRegression(max_iter=5000, solver='liblinear', class_weight='balanced')),\n",
71
- " ('1.b', 'Machine learning', 'SVM RBF kernel', SVC(class_weight='balanced', kernel='rbf', random_state=42))\n",
72
- "]\n",
73
- "\n",
74
- "# Priprema tablice za rezultate\n",
75
- "table = []\n",
76
- "\n",
77
- "# Učitavanje kombiniranih trening podataka\n",
78
- "X_train_text, y_train = load_data_combined(train_files)\n",
79
- "\n",
80
- "# TF-IDF vektorizacija s proširenim parametrima\n",
81
- "vectorizer = TfidfVectorizer(max_features=5000, ngram_range=(1, 3))\n",
82
- "X_train = vectorizer.fit_transform(X_train_text)\n",
83
- "\n",
84
- "# Treniranje i evaluacija\n",
85
- "for code, method, algorithm, model in models:\n",
86
- " print(f\"\\nTreniranje modela: {algorithm}...\")\n",
87
- " model.fit(X_train, y_train)\n",
88
- " \n",
89
- " row_train = [f\"{code}.i\", method, algorithm, \"Train combined\"]\n",
90
- " for idx, test_file in enumerate(test_files):\n",
91
- " print(f\"Predikcija na {test_names[idx]}...\")\n",
92
- " X_test_text, y_test = load_data(test_file)\n",
93
- " X_test = vectorizer.transform(X_test_text)\n",
94
- " \n",
95
- " y_pred = model.predict(X_test)\n",
96
- " precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)\n",
97
- " recall = recall_score(y_test, y_pred, average='weighted', zero_division=0)\n",
98
- " f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)\n",
99
- " accuracy = accuracy_score(y_test, y_pred)\n",
100
- " metric_str = f\"Precision={precision:.3f}, Recall={recall:.3f}, F1={f1:.3f}, Accuracy={accuracy:.3f}\"\n",
101
- " row_train.append(metric_str)\n",
102
- " \n",
103
- " # Confusion matrix\n",
104
- " cm = confusion_matrix(y_test, y_pred)\n",
105
- " disp = ConfusionMatrixDisplay(confusion_matrix=cm)\n",
106
- " disp.plot(cmap=plt.cm.Blues)\n",
107
- " plt.title(f'Confusion Matrix: {algorithm}\\nTrain: Combined Train Test: {test_names[idx]}')\n",
108
- " plt.savefig(f'confusion_matrices/cm_{algorithm.replace(\" \", \"_\")}_TrainCombined_{test_names[idx].replace(\" \", \"\")}.png')\n",
109
- " plt.close()\n",
110
- " table.append(row_train)\n",
111
- "\n",
112
- "# Ispis tablice u markdown formatu\n",
113
- "header = \"| # | method | algorithm | skup | Test 1 | Test 2 | Test 3 |\"\n",
114
- "sep = \"|--------|------------------|--------------------------|--------------|---------------------------------------------------------|---------------------------------------------------------|---------------------------------------------------------|\"\n",
115
- "print(\"\\n\" + header)\n",
116
- "print(sep)\n",
117
- "for row in table:\n",
118
- " print(f\"| {row[0]:<6} | {row[1]:<16} | {row[2]:<24} | {row[3]:<12} | {row[4]:<55} | {row[5]:<55} | {row[6]:<55} |\")\n",
119
- "\n",
120
- "# Spremi rezultate u .md datoteku\n",
121
- "with open('results_group2.md', 'w', encoding='utf-8') as f:\n",
122
- " f.write(header + \"\\n\")\n",
123
- " f.write(sep + \"\\n\")\n",
124
- " for row in table:\n",
125
- " f.write(f\"| {row[0]:<6} | {row[1]:<16} | {row[2]:<24} | {row[3]:<12} | {row[4]:<55} | {row[5]:<55} | {row[6]:<55} |\\n\")\n"
126
- ]
127
- }
128
- ],
129
- "metadata": {
130
- "kernelspec": {
131
- "display_name": "Python 3",
132
- "language": "python",
133
- "name": "python3"
134
- },
135
- "language_info": {
136
- "codemirror_mode": {
137
- "name": "ipython",
138
- "version": 3
139
- },
140
- "file_extension": ".py",
141
- "mimetype": "text/x-python",
142
- "name": "python",
143
- "nbconvert_exporter": "python",
144
- "pygments_lexer": "ipython3",
145
- "version": "3.9.6"
146
- }
147
- },
148
- "nbformat": 4,
149
- "nbformat_minor": 5
150
- }