machine-learing-methods/Lab2/6.1/main.py
2023-12-06 00:00:07 +02:00

133 lines
5.4 KiB
Python

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score, roc_curve, confusion_matrix
from sklearn.decomposition import PCA
train_df = pd.read_csv("satellite_train.csv")
test_df = pd.read_csv("satellite_test.csv")
X_train, X_test = train_df.drop(columns=["V37"]), test_df.drop(columns=["V37"])
y_train, y_test = train_df["V37"], test_df["V37"]
if False:
value_counts = y_train.value_counts(sort=False)
asc_index = sorted(value_counts.index)
asc_values = [value_counts[idx] for idx in asc_index ]
plt.bar(asc_index, asc_values, alpha=0.7)
plt.xticks(asc_index, labels=["red soil", "cotton crop", "grey soil","damp grey soil","soil with vegetation", "very damp grey soil"])
plt.xlabel("Values")
plt.ylabel("Frequency")
plt.show()
#clf = svm.SVC(kernel='rbf', C=0.7, gamma=0.1)
#clf = svm.SVC(kernel='linear')
#clf.fit(X_train[["V1", "V2"]], y_train)
#clf_tuned = GridSearchCV(svm.SVC(kernel='linear'), {'C': np.arange(0.1, 1.6, 0.2)}, cv=5)
clf_tuned = svm.SVC(kernel='rbf',C=0.19, gamma=0.00024) #GridSearchCV(svm.SVC(kernel='rbf'), {'C': [0.17, 0.18, 0.19, 0.2], 'gamma': [0.00023, 0.00024, 0.00025, 0.00026, 0.00027]}, cv=5)
clf_tuned.fit(X_train, y_train)
pred = clf_tuned.predict(X_test)
accuracy = accuracy_score(y_test, pred) * 100
#print("Best Model (Linear Kernel):\n", clf_tuned.best_estimator_)
#print("Best Parameters (Linear Kernel):\n", clf_tuned.best_params_)
print(f"Accuracy: {accuracy:.2f}%")
print("Confusion Matrix:\n", confusion_matrix(pred, y_test))
print("10-fold cross-validation score: ", cross_val_score(clf_tuned, X_test, y_test, cv=10).mean())
if False:
plt.figure(figsize=(8, 6))
# Plot the training points
plt.scatter(X_train["V1"], X_train["V2"], c=y_train, cmap=plt.cm.Paired, marker='.', s=20)
h = 0.2 # step size in the mesh
x_min, x_max = X_train["V1"].min(), X_train["V1"].max()
y_min, y_max = X_train["V2"].min(), X_train["V2"].max()
xx, yy = np.meshgrid(np.arange(x_min, x_max, 1), np.arange(y_min, y_max, 1))
Z = clf_tuned.predict(np.c_[xx.ravel(), yy.ravel()],)
Z = Z.reshape(xx.shape)
# Plot decision boundary and margins
plt.contour(xx, yy, Z, colors='k', alpha=0.5)
# Highlight the support vectors
#plt.scatter(clf_tuned.support_vectors_[:, 0], clf_tuned.support_vectors_[:, 1], s=20, linewidth=1, facecolors='none', edgecolors='k', marker='o', label='Support Vectors')
#plt.title('SVM Decision Boundary')
plt.xlabel('Feature V1')
plt.ylabel('Feature V2')
plt.show()
if False:
# Apply PCA to reduce the dimensionality to 2D
pca = PCA(n_components=2)
X_2d = pca.fit_transform(X_train)
#print(pca.components_)
# Plot the 2D representation of the data
plt.figure(figsize=(8, 6))
plt.scatter(X_2d[:, 0], X_2d[:, 1], c=y_train, cmap=plt.cm.Paired, marker='.', s=20)
plt.title('2D Projection of High-Dimensional Data using PCA')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.show()
if False:
tune_params_linear = {'C': np.arange(0.1, 2.1, 0.2)}
svm_tune_linear = GridSearchCV(svm.SVC(kernel='linear'), tune_params_linear, cv=10)
svm_tune_linear.fit(X_train, y_train)
print("Best Model (Linear Kernel):\n", svm_tune_linear.best_estimator_)
print("Best Parameters (Linear Kernel):\n", svm_tune_linear.best_params_)
plt.figure(figsize=(8, 6))
h = 0.2 # step size in the mesh
x_min, x_max = X_train["V1"].min(), X_train["V1"].max()
y_min, y_max = X_train["V2"].min(), X_train["V2"].max()
xx, yy = np.meshgrid(np.arange(x_min, x_max, 1), np.arange(y_min, y_max, 1))
Z = svm_tune_linear.predict(np.c_[xx.ravel(), yy.ravel()])
# Plot decision boundary and margins
#plt.contour(xx, yy, Z, colors='k', alpha=0.5)
# Highlight the support vectors
plt.scatter(svm_tune_linear.support_vectors_[:, 0], svm_tune_linear.support_vectors_[:, 1], s=20, linewidth=1, facecolors='none', edgecolors='k', marker='o', label='Support Vectors')
plt.title('SVM Decision Boundary')
plt.xlabel('Feature V1')
plt.ylabel('Feature V2')
plt.legend()
plt.show()
if False:
tune_params_linear = {'C': np.arange(0.1, 2.1, 0.2), 'gamma': np.arange(0.1, 4.1, 0.1)}
svm_tune_rbf = GridSearchCV(svm.SVC(kernel='rbf'), tune_params_linear, cv=5)
svm_tune_rbf.fit(X_train, y_train)
print("Best Model (RBF Kernel):\n", svm_tune_rbf.best_estimator_)
print("Best Parameters (RBF Kernel):\n", svm_tune_rbf.best_params_)
plt.figure(figsize=(8, 6))
h = 0.2 # step size in the mesh
x_min, x_max = X_train["V1"].min(), X_train["V1"].max()
y_min, y_max = X_train["V2"].min(), X_train["V2"].max()
xx, yy = np.meshgrid(np.arange(x_min, x_max, 1), np.arange(y_min, y_max, 1))
Z = svm_tune_rbf.predict(np.c_[xx.ravel(), yy.ravel()],)
# Plot decision boundary and margins
plt.contour(xx, yy, Z, colors='k', alpha=0.5)
# Highlight the support vectors
plt.scatter(svm_tune_rbf.support_vectors_[:, 0], svm_tune_rbf.support_vectors_[:, 1], s=20, linewidth=1, facecolors='none', edgecolors='k', marker='o', label='Support Vectors')
plt.title('SVM Decision Boundary')
plt.xlabel('Feature V1')
plt.ylabel('Feature V2')
plt.legend()
plt.show()