133 lines
5.4 KiB
Python
133 lines
5.4 KiB
Python
import numpy as np
|
|
import pandas as pd
|
|
import matplotlib.pyplot as plt
|
|
from sklearn import svm
|
|
from sklearn.model_selection import train_test_split
|
|
from sklearn.model_selection import GridSearchCV
|
|
from sklearn.model_selection import cross_val_score
|
|
from sklearn.metrics import accuracy_score, roc_curve, confusion_matrix
|
|
from sklearn.decomposition import PCA
|
|
|
|
train_df = pd.read_csv("satellite_train.csv")
|
|
test_df = pd.read_csv("satellite_test.csv")
|
|
|
|
X_train, X_test = train_df.drop(columns=["V37"]), test_df.drop(columns=["V37"])
|
|
y_train, y_test = train_df["V37"], test_df["V37"]
|
|
|
|
if False:
|
|
value_counts = y_train.value_counts(sort=False)
|
|
asc_index = sorted(value_counts.index)
|
|
asc_values = [value_counts[idx] for idx in asc_index ]
|
|
|
|
plt.bar(asc_index, asc_values, alpha=0.7)
|
|
plt.xticks(asc_index, labels=["red soil", "cotton crop", "grey soil","damp grey soil","soil with vegetation", "very damp grey soil"])
|
|
plt.xlabel("Values")
|
|
plt.ylabel("Frequency")
|
|
plt.show()
|
|
|
|
#clf = svm.SVC(kernel='rbf', C=0.7, gamma=0.1)
|
|
#clf = svm.SVC(kernel='linear')
|
|
#clf.fit(X_train[["V1", "V2"]], y_train)
|
|
|
|
#clf_tuned = GridSearchCV(svm.SVC(kernel='linear'), {'C': np.arange(0.1, 1.6, 0.2)}, cv=5)
|
|
clf_tuned = svm.SVC(kernel='rbf',C=0.19, gamma=0.00024) #GridSearchCV(svm.SVC(kernel='rbf'), {'C': [0.17, 0.18, 0.19, 0.2], 'gamma': [0.00023, 0.00024, 0.00025, 0.00026, 0.00027]}, cv=5)
|
|
clf_tuned.fit(X_train, y_train)
|
|
pred = clf_tuned.predict(X_test)
|
|
accuracy = accuracy_score(y_test, pred) * 100
|
|
#print("Best Model (Linear Kernel):\n", clf_tuned.best_estimator_)
|
|
#print("Best Parameters (Linear Kernel):\n", clf_tuned.best_params_)
|
|
print(f"Accuracy: {accuracy:.2f}%")
|
|
print("Confusion Matrix:\n", confusion_matrix(pred, y_test))
|
|
print("10-fold cross-validation score: ", cross_val_score(clf_tuned, X_test, y_test, cv=10).mean())
|
|
|
|
if False:
|
|
plt.figure(figsize=(8, 6))
|
|
|
|
# Plot the training points
|
|
plt.scatter(X_train["V1"], X_train["V2"], c=y_train, cmap=plt.cm.Paired, marker='.', s=20)
|
|
|
|
h = 0.2 # step size in the mesh
|
|
x_min, x_max = X_train["V1"].min(), X_train["V1"].max()
|
|
y_min, y_max = X_train["V2"].min(), X_train["V2"].max()
|
|
xx, yy = np.meshgrid(np.arange(x_min, x_max, 1), np.arange(y_min, y_max, 1))
|
|
Z = clf_tuned.predict(np.c_[xx.ravel(), yy.ravel()],)
|
|
Z = Z.reshape(xx.shape)
|
|
|
|
# Plot decision boundary and margins
|
|
plt.contour(xx, yy, Z, colors='k', alpha=0.5)
|
|
|
|
# Highlight the support vectors
|
|
#plt.scatter(clf_tuned.support_vectors_[:, 0], clf_tuned.support_vectors_[:, 1], s=20, linewidth=1, facecolors='none', edgecolors='k', marker='o', label='Support Vectors')
|
|
|
|
#plt.title('SVM Decision Boundary')
|
|
plt.xlabel('Feature V1')
|
|
plt.ylabel('Feature V2')
|
|
plt.show()
|
|
|
|
if False:
|
|
# Apply PCA to reduce the dimensionality to 2D
|
|
pca = PCA(n_components=2)
|
|
X_2d = pca.fit_transform(X_train)
|
|
#print(pca.components_)
|
|
|
|
# Plot the 2D representation of the data
|
|
plt.figure(figsize=(8, 6))
|
|
plt.scatter(X_2d[:, 0], X_2d[:, 1], c=y_train, cmap=plt.cm.Paired, marker='.', s=20)
|
|
plt.title('2D Projection of High-Dimensional Data using PCA')
|
|
plt.xlabel('Principal Component 1')
|
|
plt.ylabel('Principal Component 2')
|
|
plt.show()
|
|
|
|
if False:
|
|
tune_params_linear = {'C': np.arange(0.1, 2.1, 0.2)}
|
|
svm_tune_linear = GridSearchCV(svm.SVC(kernel='linear'), tune_params_linear, cv=10)
|
|
svm_tune_linear.fit(X_train, y_train)
|
|
|
|
print("Best Model (Linear Kernel):\n", svm_tune_linear.best_estimator_)
|
|
print("Best Parameters (Linear Kernel):\n", svm_tune_linear.best_params_)
|
|
|
|
plt.figure(figsize=(8, 6))
|
|
h = 0.2 # step size in the mesh
|
|
x_min, x_max = X_train["V1"].min(), X_train["V1"].max()
|
|
y_min, y_max = X_train["V2"].min(), X_train["V2"].max()
|
|
xx, yy = np.meshgrid(np.arange(x_min, x_max, 1), np.arange(y_min, y_max, 1))
|
|
Z = svm_tune_linear.predict(np.c_[xx.ravel(), yy.ravel()])
|
|
|
|
# Plot decision boundary and margins
|
|
#plt.contour(xx, yy, Z, colors='k', alpha=0.5)
|
|
|
|
# Highlight the support vectors
|
|
plt.scatter(svm_tune_linear.support_vectors_[:, 0], svm_tune_linear.support_vectors_[:, 1], s=20, linewidth=1, facecolors='none', edgecolors='k', marker='o', label='Support Vectors')
|
|
|
|
plt.title('SVM Decision Boundary')
|
|
plt.xlabel('Feature V1')
|
|
plt.ylabel('Feature V2')
|
|
plt.legend()
|
|
plt.show()
|
|
|
|
if False:
|
|
tune_params_linear = {'C': np.arange(0.1, 2.1, 0.2), 'gamma': np.arange(0.1, 4.1, 0.1)}
|
|
svm_tune_rbf = GridSearchCV(svm.SVC(kernel='rbf'), tune_params_linear, cv=5)
|
|
svm_tune_rbf.fit(X_train, y_train)
|
|
|
|
print("Best Model (RBF Kernel):\n", svm_tune_rbf.best_estimator_)
|
|
print("Best Parameters (RBF Kernel):\n", svm_tune_rbf.best_params_)
|
|
|
|
plt.figure(figsize=(8, 6))
|
|
h = 0.2 # step size in the mesh
|
|
x_min, x_max = X_train["V1"].min(), X_train["V1"].max()
|
|
y_min, y_max = X_train["V2"].min(), X_train["V2"].max()
|
|
xx, yy = np.meshgrid(np.arange(x_min, x_max, 1), np.arange(y_min, y_max, 1))
|
|
Z = svm_tune_rbf.predict(np.c_[xx.ravel(), yy.ravel()],)
|
|
|
|
# Plot decision boundary and margins
|
|
plt.contour(xx, yy, Z, colors='k', alpha=0.5)
|
|
|
|
# Highlight the support vectors
|
|
plt.scatter(svm_tune_rbf.support_vectors_[:, 0], svm_tune_rbf.support_vectors_[:, 1], s=20, linewidth=1, facecolors='none', edgecolors='k', marker='o', label='Support Vectors')
|
|
|
|
plt.title('SVM Decision Boundary')
|
|
plt.xlabel('Feature V1')
|
|
plt.ylabel('Feature V2')
|
|
plt.legend()
|
|
plt.show() |