machine-learing-methods/Lab2/6.1/main.py

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score, roc_curve, confusion_matrix
from sklearn.decomposition import PCA

train_df = pd.read_csv("satellite_train.csv")
test_df = pd.read_csv("satellite_test.csv")

X_train, X_test = train_df.drop(columns=["V37"]), test_df.drop(columns=["V37"])
y_train, y_test = train_df["V37"], test_df["V37"]

if False:
    value_counts = y_train.value_counts(sort=False)
    asc_index = sorted(value_counts.index)
    asc_values = [value_counts[idx] for idx in asc_index ]

    plt.bar(asc_index, asc_values, alpha=0.7)
    plt.xticks(asc_index, labels=["red soil", "cotton crop", "grey soil","damp grey soil","soil with vegetation", "very damp grey soil"])
    plt.xlabel("Values")
    plt.ylabel("Frequency")
    plt.show()

#clf = svm.SVC(kernel='rbf', C=0.7, gamma=0.1)
#clf = svm.SVC(kernel='linear')
#clf.fit(X_train[["V1", "V2"]], y_train)

#clf_tuned = GridSearchCV(svm.SVC(kernel='linear'), {'C': np.arange(0.1, 1.6, 0.2)}, cv=5)
clf_tuned = svm.SVC(kernel='rbf',C=0.19, gamma=0.00024) #GridSearchCV(svm.SVC(kernel='rbf'), {'C': [0.17, 0.18, 0.19, 0.2], 'gamma': [0.00023, 0.00024, 0.00025, 0.00026, 0.00027]}, cv=5)
clf_tuned.fit(X_train, y_train)
pred = clf_tuned.predict(X_test)
accuracy = accuracy_score(y_test, pred) * 100
#print("Best Model (Linear Kernel):\n", clf_tuned.best_estimator_)
#print("Best Parameters (Linear Kernel):\n", clf_tuned.best_params_)
print(f"Accuracy: {accuracy:.2f}%")
print("Confusion Matrix:\n", confusion_matrix(pred, y_test))
print("10-fold cross-validation score: ", cross_val_score(clf_tuned, X_test, y_test, cv=10).mean())

if False:
    plt.figure(figsize=(8, 6))

    # Plot the training points
    plt.scatter(X_train["V1"], X_train["V2"], c=y_train, cmap=plt.cm.Paired, marker='.', s=20)

    h = 0.2  # step size in the mesh
    x_min, x_max = X_train["V1"].min(), X_train["V1"].max()
    y_min, y_max = X_train["V2"].min(), X_train["V2"].max()
    xx, yy = np.meshgrid(np.arange(x_min, x_max, 1), np.arange(y_min, y_max, 1))
    Z = clf_tuned.predict(np.c_[xx.ravel(), yy.ravel()],)
    Z = Z.reshape(xx.shape)

    # Plot decision boundary and margins
    plt.contour(xx, yy, Z, colors='k', alpha=0.5)

    # Highlight the support vectors
    #plt.scatter(clf_tuned.support_vectors_[:, 0], clf_tuned.support_vectors_[:, 1], s=20, linewidth=1, facecolors='none', edgecolors='k', marker='o', label='Support Vectors')

    #plt.title('SVM Decision Boundary')
    plt.xlabel('Feature V1')
    plt.ylabel('Feature V2')
    plt.show()

if False:
    # Apply PCA to reduce the dimensionality to 2D
    pca = PCA(n_components=2)
    X_2d = pca.fit_transform(X_train)
    #print(pca.components_)

    # Plot the 2D representation of the data
    plt.figure(figsize=(8, 6))
    plt.scatter(X_2d[:, 0], X_2d[:, 1], c=y_train, cmap=plt.cm.Paired, marker='.', s=20)
    plt.title('2D Projection of High-Dimensional Data using PCA')
    plt.xlabel('Principal Component 1')
    plt.ylabel('Principal Component 2')
    plt.show()

if False:
    tune_params_linear = {'C': np.arange(0.1, 2.1, 0.2)}
    svm_tune_linear = GridSearchCV(svm.SVC(kernel='linear'), tune_params_linear, cv=10)
    svm_tune_linear.fit(X_train, y_train)

    print("Best Model (Linear Kernel):\n", svm_tune_linear.best_estimator_)
    print("Best Parameters (Linear Kernel):\n", svm_tune_linear.best_params_)

    plt.figure(figsize=(8, 6))
    h = 0.2  # step size in the mesh
    x_min, x_max = X_train["V1"].min(), X_train["V1"].max()
    y_min, y_max = X_train["V2"].min(), X_train["V2"].max()
    xx, yy = np.meshgrid(np.arange(x_min, x_max, 1), np.arange(y_min, y_max, 1))
    Z = svm_tune_linear.predict(np.c_[xx.ravel(), yy.ravel()])

    # Plot decision boundary and margins
    #plt.contour(xx, yy, Z, colors='k', alpha=0.5)

    # Highlight the support vectors
    plt.scatter(svm_tune_linear.support_vectors_[:, 0], svm_tune_linear.support_vectors_[:, 1], s=20, linewidth=1, facecolors='none', edgecolors='k', marker='o', label='Support Vectors')

    plt.title('SVM Decision Boundary')
    plt.xlabel('Feature V1')
    plt.ylabel('Feature V2')
    plt.legend()
    plt.show()

if False:
    tune_params_linear = {'C': np.arange(0.1, 2.1, 0.2), 'gamma': np.arange(0.1, 4.1, 0.1)}
    svm_tune_rbf = GridSearchCV(svm.SVC(kernel='rbf'), tune_params_linear, cv=5)
    svm_tune_rbf.fit(X_train, y_train)

    print("Best Model (RBF Kernel):\n", svm_tune_rbf.best_estimator_)
    print("Best Parameters (RBF Kernel):\n", svm_tune_rbf.best_params_)

    plt.figure(figsize=(8, 6))
    h = 0.2  # step size in the mesh
    x_min, x_max = X_train["V1"].min(), X_train["V1"].max()
    y_min, y_max = X_train["V2"].min(), X_train["V2"].max()
    xx, yy = np.meshgrid(np.arange(x_min, x_max, 1), np.arange(y_min, y_max, 1))
    Z = svm_tune_rbf.predict(np.c_[xx.ravel(), yy.ravel()],)

    # Plot decision boundary and margins
    plt.contour(xx, yy, Z, colors='k', alpha=0.5)

    # Highlight the support vectors
    plt.scatter(svm_tune_rbf.support_vectors_[:, 0], svm_tune_rbf.support_vectors_[:, 1], s=20, linewidth=1, facecolors='none', edgecolors='k', marker='o', label='Support Vectors')

    plt.title('SVM Decision Boundary')
    plt.xlabel('Feature V1')
    plt.ylabel('Feature V2')
    plt.legend()
    plt.show()