import numpy as np import pandas as pd import matplotlib.pyplot as plt from sklearn import svm from sklearn.model_selection import train_test_split from sklearn.model_selection import GridSearchCV from sklearn.model_selection import cross_val_score from sklearn.metrics import accuracy_score, roc_curve, confusion_matrix from sklearn.decomposition import PCA train_df = pd.read_csv("satellite_train.csv") test_df = pd.read_csv("satellite_test.csv") X_train, X_test = train_df.drop(columns=["V37"]), test_df.drop(columns=["V37"]) y_train, y_test = train_df["V37"], test_df["V37"] if False: value_counts = y_train.value_counts(sort=False) asc_index = sorted(value_counts.index) asc_values = [value_counts[idx] for idx in asc_index ] plt.bar(asc_index, asc_values, alpha=0.7) plt.xticks(asc_index, labels=["red soil", "cotton crop", "grey soil","damp grey soil","soil with vegetation", "very damp grey soil"]) plt.xlabel("Values") plt.ylabel("Frequency") plt.show() #clf = svm.SVC(kernel='rbf', C=0.7, gamma=0.1) #clf = svm.SVC(kernel='linear') #clf.fit(X_train[["V1", "V2"]], y_train) #clf_tuned = GridSearchCV(svm.SVC(kernel='linear'), {'C': np.arange(0.1, 1.6, 0.2)}, cv=5) clf_tuned = svm.SVC(kernel='rbf',C=0.19, gamma=0.00024) #GridSearchCV(svm.SVC(kernel='rbf'), {'C': [0.17, 0.18, 0.19, 0.2], 'gamma': [0.00023, 0.00024, 0.00025, 0.00026, 0.00027]}, cv=5) clf_tuned.fit(X_train, y_train) pred = clf_tuned.predict(X_test) accuracy = accuracy_score(y_test, pred) * 100 #print("Best Model (Linear Kernel):\n", clf_tuned.best_estimator_) #print("Best Parameters (Linear Kernel):\n", clf_tuned.best_params_) print(f"Accuracy: {accuracy:.2f}%") print("Confusion Matrix:\n", confusion_matrix(pred, y_test)) print("10-fold cross-validation score: ", cross_val_score(clf_tuned, X_test, y_test, cv=10).mean()) if False: plt.figure(figsize=(8, 6)) # Plot the training points plt.scatter(X_train["V1"], X_train["V2"], c=y_train, cmap=plt.cm.Paired, marker='.', s=20) h = 0.2 # step size in the mesh x_min, x_max = X_train["V1"].min(), X_train["V1"].max() y_min, y_max = X_train["V2"].min(), X_train["V2"].max() xx, yy = np.meshgrid(np.arange(x_min, x_max, 1), np.arange(y_min, y_max, 1)) Z = clf_tuned.predict(np.c_[xx.ravel(), yy.ravel()],) Z = Z.reshape(xx.shape) # Plot decision boundary and margins plt.contour(xx, yy, Z, colors='k', alpha=0.5) # Highlight the support vectors #plt.scatter(clf_tuned.support_vectors_[:, 0], clf_tuned.support_vectors_[:, 1], s=20, linewidth=1, facecolors='none', edgecolors='k', marker='o', label='Support Vectors') #plt.title('SVM Decision Boundary') plt.xlabel('Feature V1') plt.ylabel('Feature V2') plt.show() if False: # Apply PCA to reduce the dimensionality to 2D pca = PCA(n_components=2) X_2d = pca.fit_transform(X_train) #print(pca.components_) # Plot the 2D representation of the data plt.figure(figsize=(8, 6)) plt.scatter(X_2d[:, 0], X_2d[:, 1], c=y_train, cmap=plt.cm.Paired, marker='.', s=20) plt.title('2D Projection of High-Dimensional Data using PCA') plt.xlabel('Principal Component 1') plt.ylabel('Principal Component 2') plt.show() if False: tune_params_linear = {'C': np.arange(0.1, 2.1, 0.2)} svm_tune_linear = GridSearchCV(svm.SVC(kernel='linear'), tune_params_linear, cv=10) svm_tune_linear.fit(X_train, y_train) print("Best Model (Linear Kernel):\n", svm_tune_linear.best_estimator_) print("Best Parameters (Linear Kernel):\n", svm_tune_linear.best_params_) plt.figure(figsize=(8, 6)) h = 0.2 # step size in the mesh x_min, x_max = X_train["V1"].min(), X_train["V1"].max() y_min, y_max = X_train["V2"].min(), X_train["V2"].max() xx, yy = np.meshgrid(np.arange(x_min, x_max, 1), np.arange(y_min, y_max, 1)) Z = svm_tune_linear.predict(np.c_[xx.ravel(), yy.ravel()]) # Plot decision boundary and margins #plt.contour(xx, yy, Z, colors='k', alpha=0.5) # Highlight the support vectors plt.scatter(svm_tune_linear.support_vectors_[:, 0], svm_tune_linear.support_vectors_[:, 1], s=20, linewidth=1, facecolors='none', edgecolors='k', marker='o', label='Support Vectors') plt.title('SVM Decision Boundary') plt.xlabel('Feature V1') plt.ylabel('Feature V2') plt.legend() plt.show() if False: tune_params_linear = {'C': np.arange(0.1, 2.1, 0.2), 'gamma': np.arange(0.1, 4.1, 0.1)} svm_tune_rbf = GridSearchCV(svm.SVC(kernel='rbf'), tune_params_linear, cv=5) svm_tune_rbf.fit(X_train, y_train) print("Best Model (RBF Kernel):\n", svm_tune_rbf.best_estimator_) print("Best Parameters (RBF Kernel):\n", svm_tune_rbf.best_params_) plt.figure(figsize=(8, 6)) h = 0.2 # step size in the mesh x_min, x_max = X_train["V1"].min(), X_train["V1"].max() y_min, y_max = X_train["V2"].min(), X_train["V2"].max() xx, yy = np.meshgrid(np.arange(x_min, x_max, 1), np.arange(y_min, y_max, 1)) Z = svm_tune_rbf.predict(np.c_[xx.ravel(), yy.ravel()],) # Plot decision boundary and margins plt.contour(xx, yy, Z, colors='k', alpha=0.5) # Highlight the support vectors plt.scatter(svm_tune_rbf.support_vectors_[:, 0], svm_tune_rbf.support_vectors_[:, 1], s=20, linewidth=1, facecolors='none', edgecolors='k', marker='o', label='Support Vectors') plt.title('SVM Decision Boundary') plt.xlabel('Feature V1') plt.ylabel('Feature V2') plt.legend() plt.show()