import pandas as pd import matplotlib.pyplot as plt import numpy as np from sklearn.ensemble import RandomForestClassifier from sklearn.model_selection import train_test_split from sklearn.metrics import ConfusionMatrixDisplay, accuracy_score, confusion_matrix, mean_squared_error from contextlib import contextmanager import time from sklearn.tree import export_graphviz import graphviz from sklearn.tree import DecisionTreeClassifier def load_data(): apples = pd.read_csv("apple_quality_clean.csv") train_apples, test_apples = train_test_split(apples, test_size=0.2, random_state=42) # print(f"Training size: {len(train_apples)}") # print(f"Test size: {len(test_apples)}") train_apples_x, train_apples_y = train_apples.drop(columns=["Quality"]), train_apples["Quality"] test_apples_x , test_apples_y = test_apples.drop(columns=["Quality"]) , test_apples["Quality"] return train_apples_x, train_apples_y, test_apples_x , test_apples_y def main_decision_tree(): train_X, train_Y, test_X, test_Y = load_data() best_accuracy = 0 best_depth = 0 accuracies = [] times = [] depths = list(range(1, 19)) for max_depth in depths: start_time = time.time() tree = DecisionTreeClassifier(max_depth=max_depth, random_state=42) tree.fit(train_X, train_Y) total_time = time.time() - start_time y_prediction = tree.predict(test_X) accuracy = accuracy_score(test_Y, y_prediction) accuracies.append(accuracy) times.append(total_time) if accuracy > best_accuracy: best_accuracy = accuracy best_depth = max_depth # Accuracy plot print("last accuracy ", accuracies[-1], depths[-1]) print("best accuracy ", best_accuracy, best_depth) _, axs = plt.subplots(1, 2) axs[0].plot(depths, accuracies) axs[0].set(xlabel="Maksimalus gylis", ylabel="Tikslumas") axs[1].plot(depths, times) axs[1].set(xlabel="Maksimalus gylis", ylabel="Mokymosi trukmė") plt.show() # Confusion matrix # cm = confusion_matrix(test_apples_y, y_prediction, labels=tree.classes_) # disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=tree.classes_) # disp.plot() # plt.show() # Visualize tree # class_names = apples["Quality"].unique().astype(str).tolist() # failas = export_graphviz(tree, out_file=None, # feature_names=apples.drop(columns=["Quality"]).columns, # class_names=class_names, # filled=True, rounded=True, # special_characters=True) # graph = graphviz.Source(failas) # graph.render() def main_random_forest(): train_X, train_Y, test_X, test_Y = load_data() forest = RandomForestClassifier(5, random_state=42) forest.fit(train_X, train_Y) y_prediction = forest.predict(test_X) accuracy = accuracy_score(test_Y, y_prediction) print(max(tree.get_depth() for tree in forest.estimators_)) print(accuracy) cm = confusion_matrix(test_Y, y_prediction, labels=forest.classes_) disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=forest.classes_) disp.plot() plt.show() # best_accuracy = 0 # best_forest_size = 0 # accuracies = [] # forest_sizes = list(range(3, 9+1, 5)) # for forest_size in forest_sizes: # forest = RandomForestClassifier(forest_size, random_state=42) # forest.fit(train_X, train_Y) # y_prediction = forest.predict(test_X) # accuracy = accuracy_score(test_Y, y_prediction) # accuracies.append(accuracy) # if accuracy > best_accuracy: # best_accuracy = accuracy # best_forest_size = forest_size # print("best", best_accuracy, best_forest_size) # plt.plot(forest_sizes, accuracies) # plt.xlabel("Medžių kiekis") # plt.ylabel("Tikslumas") # plt.show() main_random_forest()