Remove figures

2023-12-05 23:50:32 +02:00 · 2023-12-05 23:50:32 +02:00 · a0dd07b94a
commit a0dd07b94a
parent 61313c1e87
8 changed files with 747 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1 +1,2 @@
 Lab1/Figures/
+Lab2/Figures/
--- a/Lab2/5.2/part1.py
+++ b/Lab2/5.2/part1.py
@ -0,0 +1,118 @@
+import pandas as pd
+import numpy as np
+from sklearn.model_selection import train_test_split
+from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor, plot_tree
+from sklearn.metrics import accuracy_score, roc_curve, confusion_matrix
+import matplotlib.pyplot as plt
+from sklearn.preprocessing import LabelEncoder
+from sklearn.model_selection import cross_val_score
+
+# Load the Carseats data
+MusicSpotify = pd.read_csv('music_spotify.csv')
+MusicSpotify = MusicSpotify.drop(columns=['X', 'artist', 'song_title'])
+#print(MusicSpotify.head())
+
+label_encoder = LabelEncoder()
+label_encoder.fit_transform(MusicSpotify['target'])
+
+# Data division
+X = MusicSpotify.drop(columns=['target']) #dropping Sales, because class variable High is made of variable Sales.
+y = MusicSpotify['target']
+
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
+
+# Tree training (no cost complexity pruning)
+tree_music_spotify = DecisionTreeClassifier(ccp_alpha=0.00, min_samples_split=2)
+tree_music_spotify.fit(X_train, y_train)
+
+pred_rbf = tree_music_spotify.predict(X_test)
+
+
+if False:
+    # Plot the Decision Tree with matplotlib
+    feature_names = X_train.columns.tolist()  # Assuming X_train is a DataFrame
+    class_names = y_train.unique().astype(str)
+    plt.figure(figsize=(12, 8))
+    plot_tree(tree_music_spotify, filled=True, feature_names=feature_names, class_names=class_names, rounded=True, fontsize=10)
+    plt.show()
+
+# Tree information and visualization
+path = tree_music_spotify.cost_complexity_pruning_path(X_train, y_train)
+ccp_alphas, impurities = path.ccp_alphas, path.impurities
+#path = tree_music_spotify.cost_complexity_pruning_path(X_train, y_train)
+if False:
+    fig, ax = plt.subplots()
+    ax.plot(ccp_alphas[:-1], impurities[:-1], marker="o", drawstyle="steps-post")
+    ax.set_xlabel("effective alpha")
+    ax.set_ylabel("total impurity of leaves")
+    ax.set_title("Total Impurity vs effective alpha for training set")
+    plt.grid()
+    plt.show()
+
+if False:
+   # Vary the hyperparameter (e.g., max depth)
+    depth_values = range(1, 25)
+    cv_scores = []
+
+    for depth in depth_values:
+        dt_classifier = DecisionTreeClassifier(max_depth=depth, random_state=20)
+        scores = cross_val_score(dt_classifier, X, y, cv=10, scoring='accuracy')
+        cv_scores.append(np.mean(scores))
+
+    # Plot the cross-validated error
+    plt.plot(depth_values, 1 - np.array(cv_scores), marker='o')
+    plt.xlabel('Tree depth')
+    plt.ylabel('Cross-validated error rate')
+    plt.title('Cross-validated error vs. tree depth')
+    plt.grid(True)
+    plt.show()
+
+if False:
+    ccp_alphas_collect=[]
+    accuracy_collect=[]
+    for ccp_alpha in ccp_alphas:
+        tree = DecisionTreeClassifier(ccp_alpha=ccp_alpha)
+        tree.fit(X_train, y_train)
+        accuracy = tree.score(X_test, y_test)
+        ccp_alphas_collect.append(ccp_alpha)
+        accuracy_collect.append(accuracy)
+
+    plt.plot(np.array(ccp_alphas_collect),np.array(accuracy_collect))
+    plt.grid()
+    plt.xlim(0, 0.03)
+    plt.xlabel('CP alpha')
+    plt.ylabel('Accuracy')
+    plt.show()
+
+if False:
+    plt.figure(figsize=(12, 6))
+    plot_tree(tree_music_spotify, filled=True, feature_names=X.columns.tolist())
+    plt.show()
+
+# Prediction and accuracy
+pred = tree_music_spotify.predict(X_test)
+accuracy = accuracy_score(y_test, pred) * 100
+print("Unprunned:")
+print(f"Accuracy: {accuracy:.2f}%")
+print("10-fold cross-validation score: ", cross_val_score(tree_music_spotify, X, y, cv=10).mean())
+print("Confusion Matrix:\n", confusion_matrix(pred, y_test))
+
+# Pruning
+music_spotify_prunned = DecisionTreeClassifier(ccp_alpha=0.006, random_state=20)
+music_spotify_prunned.fit(X_train, y_train)
+pred = music_spotify_prunned.predict(X_test)
+print("Prunned:")
+print(f"Accuracy: {accuracy_score(y_test, pred) * 100:.2f}%")
+print("10-fold cross-validation score: ", cross_val_score(music_spotify_prunned, X, y, cv=10).mean())
+print("Confusion Matrix:\n", confusion_matrix(pred, y_test))
+
+if False:
+    plt.figure(figsize=(12, 6))
+    plot_tree(music_spotify_prunned, filled=True, feature_names=X.columns.tolist())
+    plt.show()
+
+if True:
+    feat_importances = pd.DataFrame(tree_music_spotify.feature_importances_, index=X_train.columns, columns=["Importance"])
+    feat_importances.sort_values(by='Importance', ascending=False, inplace=True)
+    feat_importances.plot(kind='bar', figsize=(8,6))
+    plt.show()
--- a/Lab2/5.2/part2.py
+++ b/Lab2/5.2/part2.py
@ -0,0 +1,107 @@
+import pandas as pd
+import numpy as np
+from sklearn.model_selection import train_test_split
+from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor, plot_tree
+from sklearn.metrics import accuracy_score
+import matplotlib.pyplot as plt
+from sklearn.preprocessing import LabelEncoder
+from sklearn.model_selection import cross_val_score
+from time import time
+
+print("Loading data")
+train_df = pd.read_csv("sign_mnist_train.csv")
+test_df = pd.read_csv("sign_mnist_test.csv")
+
+def remove_every_nth_column(df, count):
+    return df.loc[:, (np.arange(len(df.columns)) + 1) % count != 0]
+
+def leave_every_nth_column(df, count):
+    return df.loc[:, (np.arange(len(df.columns)) + 1) % count == 0]
+
+X_train, X_test = train_df.drop(columns=["label"]), test_df.drop(columns=["label"])
+y_train, y_test = train_df["label"], test_df["label"]
+
+# # ('leave_every_nth_column_5', 0.6002509760178472)
+# X_train = leave_every_nth_column(X_train, 5)
+# X_test = leave_every_nth_column(X_test, 5)
+
+# # ('remove_every_nth_column_15', 0.6167038482989403)
+# X_train = remove_every_nth_column(X_train, 15)
+# X_test = remove_every_nth_column(X_test, 15)
+
+# # ('remove_every_nth_column_14', 0.6179587283881762)
+# X_train = remove_every_nth_column(X_train, 14)
+# X_test = remove_every_nth_column(X_test, 14)
+
+# # ('remove_every_nth_column_11', 0.6183770217512549)
+# X_train = remove_every_nth_column(X_train, 11)
+# X_test = remove_every_nth_column(X_test, 11)
+
+start = time()
+ccp_alpha = 0.0009
+print("Training", ccp_alpha)
+decision_tree = DecisionTreeClassifier(ccp_alpha=ccp_alpha)
+decision_tree.fit(X_train, y_train)
+print(time() - start)
+
+if False:
+    print("Viz")
+    ccp_alphas_collect=[]
+    accuracy_collect=[]
+    for ccp_alpha in ccp_alphas:
+        tree = DecisionTreeClassifier(ccp_alpha=ccp_alpha)
+        tree.fit(X_train, y_train)
+        accuracy = tree.score(X_test, y_test)
+        ccp_alphas_collect.append(ccp_alpha)
+        accuracy_collect.append(accuracy)
+
+    plt.plot(np.array(ccp_alphas_collect),np.array(accuracy_collect))
+    plt.grid()
+    plt.xlabel('effective alpha')
+    plt.ylabel('Accuracy of test set')
+    plt.show()
+
+pred = decision_tree.predict(X_test)
+accuracy = accuracy_score(y_test, pred) * 100
+print("Unprunned:")
+print(f"Accuracy: {accuracy:.2f}%")
+#print("10-fold cross-validation score: ", cross_val_score(decision_tree, X_test, y_test, cv=10).mean())
+
+if False:
+    plt.figure(figsize=(12, 6))
+    plot_tree(decision_tree, filled=True, feature_names=X_test.columns.tolist())
+    plt.show()
+
+if False:
+    # Pruning
+    decision_tree_prunned = DecisionTreeClassifier(ccp_alpha=0.01)
+    decision_tree_prunned.fit(X_train, y_train)
+    print("Prunned:")
+    print(f"Accuracy: {accuracy_score(decision_tree_prunned.predict(X_test), pred) * 100:.2f}%")
+    print("10-fold cross-validation score: ", cross_val_score(decision_tree_prunned, X_test, y_test, cv=10).mean())
+
+if True:
+    if True:
+        feat_importances = pd.DataFrame(decision_tree.feature_importances_, index=X_train.columns, columns=["Importance"])
+        print(decision_tree.feature_importances_)
+        print(len(decision_tree.feature_importances_))
+        print(X_train.columns)
+
+        image = []
+        for y in range(28):
+            image.append([0]*28)
+
+
+        for idx, importance in enumerate(decision_tree.feature_importances_):
+            pixel_idx = int(X_train.columns[idx].removeprefix("pixel"))-1
+            pixel_x = pixel_idx % 28
+            pixel_y = pixel_idx // 28
+            image[pixel_y][pixel_x] = importance
+
+        for y in range(28):
+            print(",".join(str(a) for a in image[y]))
+
+    # feat_importances = pd.DataFrame(decision_tree.feature_importances_, index=X_train.columns, columns=["Importance"])
+    # feat_importances.sort_values(by='Importance', ascending=False, inplace=True)
+    # feat_importances.plot(kind='bar', figsize=(8,6))
+    # plt.show()
--- a/Lab2/6.1/example.py
+++ b/Lab2/6.1/example.py
@ -0,0 +1,151 @@
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+from sklearn import svm
+from sklearn.model_selection import train_test_split
+from sklearn.model_selection import GridSearchCV
+from sklearn.metrics import confusion_matrix
+
+# 1. Linear SVC
+np.random.seed(1)
+x = np.random.normal(size=(20, 2))
+y = np.concatenate([-np.ones(10), np.ones(10)])
+x[y == 1, :] = x[y == 1, :] + 1
+# plt.scatter(x[:, 0], x[:, 1], c=(3 - y), marker='o', edgecolors='k')
+# plt.grid(True)
+# plt.show()
+
+# 2. Linear SVC fitting
+data = pd.DataFrame({'x1': x[:, 0], 'x2': x[:, 1], 'y': y})
+svm_model = svm.SVC(kernel='linear', C=10)
+svm_model.fit(data[['x1', 'x2']], data['y'])
+
+# 3. Visualizing
+if False:
+    xx, yy = np.meshgrid(np.linspace(-2.1, 2.5, 100), np.linspace(-1.3, 2.6, 100))
+    Z = svm_model.decision_function(np.c_[xx.ravel(), yy.ravel()])
+    Z = Z.reshape(xx.shape)
+
+    plt.contourf(xx, yy, Z, levels=[-1, 0, 1], cmap=plt.cm.terrain, alpha=0.5)
+    plt.scatter(x[:, 0], x[:, 1], c=(3 - y), marker='o', edgecolors='k')
+    plt.xlim(-2.1, 2.5)
+    plt.ylim(-1.3, 2.6)
+    plt.grid(True)
+    plt.show()
+
+# 4. Indices of Support vectors
+print("Indices of Support Vectors:", svm_model.support_)
+
+# 6. Controlling the cost=1/C hyperparameter
+svm_model_low_cost = svm.SVC(kernel='linear', C=100)
+svm_model_low_cost.fit(data[['x1', 'x2']], data['y'])
+
+if False:
+    xx, yy = np.meshgrid(np.linspace(-5,5, 100), np.linspace(-5,5, 100))
+    Z = svm_model_low_cost.decision_function(np.c_[xx.ravel(), yy.ravel()])
+    Z = Z.reshape(xx.shape)
+
+    plt.contourf(xx, yy, Z, levels=[-1, 0, 1], cmap=plt.cm.terrain, alpha=0.5)
+    plt.scatter(x[:, 0], x[:, 1], c=(3 - y), marker='o', edgecolors='k')
+    plt.xlim(-2.1, 2.5)
+    plt.ylim(-1.3, 2.6)
+    plt.grid(True)
+    plt.show()
+
+print("Indices of Support Vectors (Low Cost):", svm_model_low_cost.support_)
+print("Model Summary (Low Cost):\n", svm_model_low_cost)
+
+# 7. Which cost value is better?
+np.random.seed(1)
+tune_params = {'C': [0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 1, 2, 3, 4]}
+svm_tune = GridSearchCV(svm.SVC(kernel='linear'), tune_params, cv=5)
+svm_tune.fit(data[['x1', 'x2']], data['y'])
+
+# 8. Using the best model
+best_model = svm_tune.best_estimator_
+print("Best Model:\n", best_model)
+
+# 9. Testing your model
+x_test = np.random.normal(size=(20, 2))
+y_test = np.random.choice([-1, 1], size=20, replace=True)
+x_test[y_test == 1, :] = x_test[y_test == 1, :] + 1
+test_data = pd.DataFrame({'x1': x_test[:, 0], 'x2': x_test[:, 1], 'y': y_test})
+
+y_pred = best_model.predict(test_data[['x1', 'x2']])
+print("Confusion Matrix:\n", confusion_matrix(y_pred, test_data['y']))
+
+# 10. Is linear boundary always the best choice?
+np.random.seed(1)
+x = np.random.normal(size=(200, 2))
+x[:100, :] = x[:100, :] + 2
+x[100:150, :] = x[100:150, :] - 2
+y = np.concatenate([-np.ones(150), np.ones(50)])
+data = pd.DataFrame({'x1': x[:, 0], 'x2': x[:, 1], 'y': y})
+if False:
+    plt.scatter(x[:, 0], x[:, 1], c=(3 - y), marker='o', edgecolors='k')
+    plt.grid(True)
+    plt.show()
+
+# 11. Splitting and fitting
+train, test = train_test_split(data, test_size=0.2, random_state=1)
+svm_fit = svm.SVC(kernel='rbf', gamma=0.05, C=100)
+svm_fit.fit(train[['x1', 'x2']], train['y'])
+
+if False:
+    xx, yy = np.meshgrid(np.linspace(-5,5, 100), np.linspace(-5,5, 100))
+    Z = svm_fit.decision_function(np.c_[xx.ravel(), yy.ravel()])
+    Z = Z.reshape(xx.shape)
+    plt.contourf(xx, yy, Z, levels=[-1, 0, 1], cmap=plt.cm.terrain, alpha=0.5)
+    plt.scatter(train['x1'], train['x2'], c=(3 - train['y']), marker='o', edgecolors='k')
+    #plt.xlim(-2.1, 2.5)
+    #plt.ylim(-1.3, 2.6)
+    plt.grid(True)
+    plt.show()
+
+print("Model Summary:\n", svm_fit)
+
+# Tuning hyperparameters
+if False:
+    tune_params_rbf = {'C': np.arange(0.1, 3.1, 0.2), 'gamma': np.arange(0.1, 5.1, 0.1)}
+    svm_tune_rbf = GridSearchCV(svm.SVC(kernel='rbf'), tune_params_rbf, cv=5)
+    svm_tune_rbf.fit(train[['x1', 'x2']], train['y'])
+
+    print("Best Model (RBF Kernel):\n", svm_tune_rbf.best_estimator_)
+    print("Best Parameters (RBF Kernel):\n", svm_tune_rbf.best_params_)
+
+    # 12. Prediction
+    pred_rbf = svm_tune_rbf.predict(test[['x1', 'x2']])
+    print("Confusion Matrix (RBF Kernel):\n", confusion_matrix(pred_rbf, test['y']))
+
+# 13. Multiclass
+np.random.seed(1)
+x = np.vstack([x, np.random.normal(size=(50, 2))])
+y = np.concatenate([y, np.zeros(50)])
+x = np.vstack([x, [[0, 0]]])
+y = np.concatenate([y, [2]])
+
+
+x[y == 0, 1] = x[y == 0, 1] + 2
+data_multiclass = pd.DataFrame({'x1': x[:, 0], 'x2': x[:, 1], 'y': y})
+
+if False:
+    plt.scatter(x[:, 0], x[:, 1], c=(3 - y), marker='o', edgecolors='k')
+    plt.grid(True)
+    plt.show()
+
+svm_fit_multiclass = svm.SVC(kernel='linear')
+svm_fit_multiclass.fit(data_multiclass[['x1', 'x2']], data_multiclass['y'])
+
+if True:
+    xx, yy = np.meshgrid(np.linspace(-5,5, 100), np.linspace(-5,5, 100))
+    Z = svm_fit.decision_function(np.c_[xx.ravel(), yy.ravel()])
+    print("xx.shape", xx.shape)
+    print(Z.shape)
+    Z = Z.reshape(xx.shape)
+    print(Z.shape)
+    plt.contourf(xx, yy, Z, levels=[-1, 0, 1], cmap=plt.cm.terrain, alpha=0.5)
+    plt.scatter(x[:, 0], x[:, 1], c=(3 - y), marker='o', edgecolors='k')
+    plt.xlim(-5,5)
+    plt.ylim(-5,5)
+    plt.grid(True)
+    plt.show()
--- a/Lab2/6.1/main.py
+++ b/Lab2/6.1/main.py
@ -0,0 +1,133 @@
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+from sklearn import svm
+from sklearn.model_selection import train_test_split
+from sklearn.model_selection import GridSearchCV
+from sklearn.model_selection import cross_val_score
+from sklearn.metrics import accuracy_score, roc_curve, confusion_matrix
+from sklearn.decomposition import PCA
+
+train_df = pd.read_csv("satellite_train.csv")
+test_df = pd.read_csv("satellite_test.csv")
+
+X_train, X_test = train_df.drop(columns=["V37"]), test_df.drop(columns=["V37"])
+y_train, y_test = train_df["V37"], test_df["V37"]
+
+if False:
+    value_counts = y_train.value_counts(sort=False)
+    asc_index = sorted(value_counts.index)
+    asc_values = [value_counts[idx] for idx in asc_index ]
+
+    plt.bar(asc_index, asc_values, alpha=0.7)
+    plt.xticks(asc_index, labels=["red soil", "cotton crop", "grey soil","damp grey soil","soil with vegetation", "very damp grey soil"])
+    plt.xlabel("Values")
+    plt.ylabel("Frequency")
+    plt.show()
+
+#clf = svm.SVC(kernel='rbf', C=0.7, gamma=0.1)
+#clf = svm.SVC(kernel='linear')
+#clf.fit(X_train[["V1", "V2"]], y_train)
+
+#clf_tuned = GridSearchCV(svm.SVC(kernel='linear'), {'C': np.arange(0.1, 1.6, 0.2)}, cv=5)
+clf_tuned = svm.SVC(kernel='rbf',C=0.19, gamma=0.00024) #GridSearchCV(svm.SVC(kernel='rbf'), {'C': [0.17, 0.18, 0.19, 0.2], 'gamma': [0.00023, 0.00024, 0.00025, 0.00026, 0.00027]}, cv=5)
+clf_tuned.fit(X_train, y_train)
+pred = clf_tuned.predict(X_test)
+accuracy = accuracy_score(y_test, pred) * 100
+#print("Best Model (Linear Kernel):\n", clf_tuned.best_estimator_)
+#print("Best Parameters (Linear Kernel):\n", clf_tuned.best_params_)
+print(f"Accuracy: {accuracy:.2f}%")
+print("Confusion Matrix:\n", confusion_matrix(pred, y_test))
+print("10-fold cross-validation score: ", cross_val_score(clf_tuned, X_test, y_test, cv=10).mean())
+
+if False:
+    plt.figure(figsize=(8, 6))
+
+    # Plot the training points
+    plt.scatter(X_train["V1"], X_train["V2"], c=y_train, cmap=plt.cm.Paired, marker='.', s=20)
+
+    h = 0.2  # step size in the mesh
+    x_min, x_max = X_train["V1"].min(), X_train["V1"].max()
+    y_min, y_max = X_train["V2"].min(), X_train["V2"].max()
+    xx, yy = np.meshgrid(np.arange(x_min, x_max, 1), np.arange(y_min, y_max, 1))
+    Z = clf_tuned.predict(np.c_[xx.ravel(), yy.ravel()],)
+    Z = Z.reshape(xx.shape)
+
+    # Plot decision boundary and margins
+    plt.contour(xx, yy, Z, colors='k', alpha=0.5)
+
+    # Highlight the support vectors
+    #plt.scatter(clf_tuned.support_vectors_[:, 0], clf_tuned.support_vectors_[:, 1], s=20, linewidth=1, facecolors='none', edgecolors='k', marker='o', label='Support Vectors')
+
+    #plt.title('SVM Decision Boundary')
+    plt.xlabel('Feature V1')
+    plt.ylabel('Feature V2')
+    plt.show()
+
+if False:
+    # Apply PCA to reduce the dimensionality to 2D
+    pca = PCA(n_components=2)
+    X_2d = pca.fit_transform(X_train)
+    #print(pca.components_)
+
+    # Plot the 2D representation of the data
+    plt.figure(figsize=(8, 6))
+    plt.scatter(X_2d[:, 0], X_2d[:, 1], c=y_train, cmap=plt.cm.Paired, marker='.', s=20)
+    plt.title('2D Projection of High-Dimensional Data using PCA')
+    plt.xlabel('Principal Component 1')
+    plt.ylabel('Principal Component 2')
+    plt.show()
+
+if False:
+    tune_params_linear = {'C': np.arange(0.1, 2.1, 0.2)}
+    svm_tune_linear = GridSearchCV(svm.SVC(kernel='linear'), tune_params_linear, cv=10)
+    svm_tune_linear.fit(X_train, y_train)
+
+    print("Best Model (Linear Kernel):\n", svm_tune_linear.best_estimator_)
+    print("Best Parameters (Linear Kernel):\n", svm_tune_linear.best_params_)
+
+    plt.figure(figsize=(8, 6))
+    h = 0.2  # step size in the mesh
+    x_min, x_max = X_train["V1"].min(), X_train["V1"].max()
+    y_min, y_max = X_train["V2"].min(), X_train["V2"].max()
+    xx, yy = np.meshgrid(np.arange(x_min, x_max, 1), np.arange(y_min, y_max, 1))
+    Z = svm_tune_linear.predict(np.c_[xx.ravel(), yy.ravel()])
+
+    # Plot decision boundary and margins
+    #plt.contour(xx, yy, Z, colors='k', alpha=0.5)
+
+    # Highlight the support vectors
+    plt.scatter(svm_tune_linear.support_vectors_[:, 0], svm_tune_linear.support_vectors_[:, 1], s=20, linewidth=1, facecolors='none', edgecolors='k', marker='o', label='Support Vectors')
+
+    plt.title('SVM Decision Boundary')
+    plt.xlabel('Feature V1')
+    plt.ylabel('Feature V2')
+    plt.legend()
+    plt.show()
+
+if False:
+    tune_params_linear = {'C': np.arange(0.1, 2.1, 0.2), 'gamma': np.arange(0.1, 4.1, 0.1)}
+    svm_tune_rbf = GridSearchCV(svm.SVC(kernel='rbf'), tune_params_linear, cv=5)
+    svm_tune_rbf.fit(X_train, y_train)
+
+    print("Best Model (RBF Kernel):\n", svm_tune_rbf.best_estimator_)
+    print("Best Parameters (RBF Kernel):\n", svm_tune_rbf.best_params_)
+
+    plt.figure(figsize=(8, 6))
+    h = 0.2  # step size in the mesh
+    x_min, x_max = X_train["V1"].min(), X_train["V1"].max()
+    y_min, y_max = X_train["V2"].min(), X_train["V2"].max()
+    xx, yy = np.meshgrid(np.arange(x_min, x_max, 1), np.arange(y_min, y_max, 1))
+    Z = svm_tune_rbf.predict(np.c_[xx.ravel(), yy.ravel()],)
+
+    # Plot decision boundary and margins
+    plt.contour(xx, yy, Z, colors='k', alpha=0.5)
+
+    # Highlight the support vectors
+    plt.scatter(svm_tune_rbf.support_vectors_[:, 0], svm_tune_rbf.support_vectors_[:, 1], s=20, linewidth=1, facecolors='none', edgecolors='k', marker='o', label='Support Vectors')
+
+    plt.title('SVM Decision Boundary')
+    plt.xlabel('Feature V1')
+    plt.ylabel('Feature V2')
+    plt.legend()
+    plt.show()
--- a/Lab2/7.1/example.py
+++ b/Lab2/7.1/example.py
@ -0,0 +1,115 @@
+import h2o
+from h2o.estimators.deeplearning import H2ODeepLearningEstimator
+from h2o.grid.grid_search import H2OGridSearch
+import pandas as pd
+from sklearn.model_selection import train_test_split
+import matplotlib.pyplot as plt
+import seaborn as sns
+
+# Initialize H2O cluster
+h2o.init(nthreads=-1, max_mem_size="2G")
+
+# Load data
+data = pd.read_csv("customer_churn.csv")
+#print(data.head())
+
+tenure_churned = data[data['Churn'] == 'Yes']['tenure']
+tenure_stayed = data[data['Churn'] == 'No']['tenure']
+
+if False:
+    plt.figure()
+    sns.kdeplot(tenure_churned, color='firebrick', linewidth=3, label='Churned')
+    sns.kdeplot(tenure_stayed, color='dodgerblue', linewidth=3, label='Stayed')
+
+    plt.title('Tenure Variable')
+    plt.xlabel('Number of months the customer is with the company')
+    plt.legend(loc='upper right')
+    plt.grid(True)
+    plt.show()
+
+# Split data
+seed = 1
+X_train, X_test, y_train, y_test = train_test_split(
+    data.drop("Churn", axis=1),  # Features
+    data["Churn"],  # Target variable
+    test_size=0.2,
+    random_state=seed
+)
+X_train['Churn']=y_train
+X_test['Churn']=y_test
+
+# Assuming 'train' is a pandas DataFrame
+h2o_train = h2o.H2OFrame(X_train)
+
+# Assuming 'test' is a pandas DataFrame
+h2o_test = h2o.H2OFrame(X_test)
+
+# Training the neural network
+dl_model = H2ODeepLearningEstimator(
+    activation="Rectifier",
+    hidden=[2,2],
+    loss="CrossEntropy",
+    score_each_iteration=True,
+    epochs=10000,
+    balance_classes=False,
+    rate=0.01,
+    adaptive_rate=False,
+    stopping_rounds=0,
+    classification_stop=-1
+)
+
+dl_model.train(x=h2o_train.columns[0:19], y="Churn", training_frame=h2o_train)
+
+if False:
+    plt.plot(dl_model.scoring_history().epochs,dl_model.scoring_history().training_logloss)
+    plt.xlabel('Epochs')
+    plt.ylabel('log_loss')
+    plt.grid()
+    plt.show()
+
+# Prediction
+prediction = dl_model.predict(h2o_test)
+prediction = prediction["predict"].as_data_frame()
+h2o_test_df = h2o_test.as_data_frame()
+
+# Calculate accuracy
+accuracy = (prediction['predict']== h2o_test_df["Churn"]).mean() * 100
+print(f"Accuracy: {accuracy:.2f}%")
+
+conf_matrix = pd.crosstab(prediction["predict"], h2o_test_df["Churn"], rownames=["Predicted"], colnames=["Actual"])
+conf_matrix_diag = conf_matrix.values.diagonal() / conf_matrix.sum(axis=0) * 100
+print("Confusion Matrix:")
+print(conf_matrix)
+print("Diagonal Percentages:")
+print(conf_matrix_diag)
+
+print("-------------- DEEPER --------------")
+
+# Deeper model
+dl_model_balanced = H2ODeepLearningEstimator(
+    activation="Rectifier",
+    hidden=[10,10,10],
+    loss="CrossEntropy",
+    score_each_iteration=True,
+    epochs=10000,
+    balance_classes=False,
+    rate=0.01,
+    adaptive_rate=False,
+    stopping_rounds=0,
+    classification_stop=-1
+)
+
+dl_model_balanced.train(x=h2o_train.columns[0:19], y="Churn", training_frame=h2o_train);
+
+plt.plot(dl_model_balanced.scoring_history().epochs,dl_model_balanced.scoring_history().training_logloss)
+plt.xlabel('Epochs')
+plt.ylabel('log_loss')
+plt.grid()
+
+# Prediction
+prediction = dl_model_balanced.predict(h2o_test)
+prediction = prediction["predict"].as_data_frame()
+h2o_test_df = h2o_test.as_data_frame()
+# Calculate accuracy
+accuracy = (prediction['predict']== h2o_test_df["Churn"]).mean() * 100
+print(f"Accuracy: {accuracy:.2f}%")
--- a/Lab2/7.1/main.py
+++ b/Lab2/7.1/main.py
@ -0,0 +1,122 @@
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+
+import h2o
+from h2o.estimators.deeplearning import H2ODeepLearningEstimator
+from h2o.grid.grid_search import H2OGridSearch
+
+train_df = pd.read_csv("arrhythmia_train.csv")
+test_df = pd.read_csv("arrhythmia_test.csv")
+
+#X_train, X_test = train_df.drop(columns=["arrhythmia"]), test_df.drop(columns=["arrhythmia"])
+#y_train, y_test = train_df["arrhythmia"], test_df["arrhythmia"]
+
+# 1.
+if False:
+    target_class = 0
+    max_count = 10
+    count = 0
+    cmap = plt.cm.get_cmap('hsv', max_count)
+    idx = 0
+    for i in range(len(X_train["X1"])):
+        if y_train[i] != target_class: continue
+        y = []
+        x = []
+
+        for j in range(187):
+            if X_train[f"X{j+1}"][i] == 0: break
+            x.append(idx)
+            y.append(X_train[f"X{j+1}"][i])
+            idx += 1
+
+        plt.plot(x, y, c=cmap(count))
+
+        count += 1
+        if count == max_count: break
+    plt.show()
+
+# 2.
+if False:
+    x_feature = "X20"
+    y_feature = "X50"
+
+    for point_class in y_test.unique():
+        plt.scatter(X_train[y_train == point_class][x_feature], X_train[y_train == point_class][y_feature], s=2, label=str(point_class))
+
+    plt.xlabel(x_feature)
+    plt.ylabel(y_feature)
+    plt.legend()
+    plt.show()
+
+# 3.
+if False:
+    value_counts = y_train.value_counts(sort=False)
+    asc_index = sorted(value_counts.index)
+    asc_values = [value_counts[idx] for idx in asc_index]
+
+    plt.bar(asc_index, asc_values, alpha=0.7)
+    plt.xticks(asc_index, labels=["normal (0)", "supraventricular (1)", "ventricular (2)","unknown (4)"])
+    plt.ylabel("Frequencies")
+    plt.title("Training dataset class distribution")
+    plt.show()
+
+if False:
+    value_counts = y_test.value_counts(sort=False)
+    asc_index = sorted(value_counts.index)
+    asc_values = [value_counts[idx] for idx in asc_index]
+
+    plt.bar(asc_index, asc_values, alpha=0.7)
+    plt.xticks(asc_index, labels=["normal (0)", "supraventricular (1)", "ventricular (2)","unknown (4)"])
+    plt.ylabel("Frequencies")
+    plt.title("Test dataset class distribution")
+    plt.show()
+
+# 4.
+if True:
+    # Initialize H2O cluster
+    h2o.init(nthreads=-1, max_mem_size="6G")
+
+    # Assuming 'train' is a pandas DataFrame
+    h2o_train = h2o.H2OFrame(train_df)
+
+    # Assuming 'test' is a pandas DataFrame
+    h2o_test = h2o.H2OFrame(test_df)
+
+    # Training the neural network
+    dl_model = H2ODeepLearningEstimator(
+        activation="tanh",
+        hidden=[50, 20],
+        input_dropout_ratio=0.2,
+        l1=1e-5,
+        epochs=20
+    )
+
+    dl_model.train(x=h2o_train.columns[:-1], y="arrhythmia", training_frame=h2o_train, validation_frame=h2o_test)
+
+    # Prediction
+    prediction = dl_model.predict(h2o_test)
+    prediction = prediction["predict"].as_data_frame()
+    h2o_test_df = h2o_test.as_data_frame()
+
+    # Calculate accuracy
+    classess = [0,1,2,4]
+    confusion_matrix = [[0]*5, [0]*5, [0]*5, [0]*5]
+    for i in range(len(h2o_test_df["arrhythmia"])):
+        expected = h2o_test_df["arrhythmia"][i]
+        actual = round(prediction['predict'][i])
+        if actual not in classess:
+            confusion_matrix[classess.index(expected)][-1] += 1
+        else:
+            actual_idx = classess.index(actual)
+            confusion_matrix[classess.index(expected)][actual_idx] += 1
+
+
+    accuracy = (abs(prediction['predict'] - h2o_test_df["arrhythmia"]) < 0.5).mean() * 100
+    print(f"Accuracy: {accuracy:.2f}%")
+
+    print("Confusion Matrix:")
+    print("Expected | Predicted")
+    print("         | 0 1 2 4 -1")
+    for i, row in enumerate(confusion_matrix):
+        print("       " + str(classess[i]) + " | "+ " ".join(f"{cell:5}" for cell in row))
--- a/Lab2/IF-1-1_Rokas_Puzonas.pdf
+++ b/Lab2/IF-1-1_Rokas_Puzonas.pdf