1
0
intelektikos-pagrindai/lab2.py
2024-03-24 21:13:24 +02:00

118 lines
3.9 KiB
Python

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import ConfusionMatrixDisplay, accuracy_score, confusion_matrix, mean_squared_error
from contextlib import contextmanager
import time
from sklearn.tree import export_graphviz
import graphviz
from sklearn.tree import DecisionTreeClassifier
def load_data():
apples = pd.read_csv("apple_quality_clean.csv")
train_apples, test_apples = train_test_split(apples, test_size=0.2, random_state=42)
# print(f"Training size: {len(train_apples)}")
# print(f"Test size: {len(test_apples)}")
train_apples_x, train_apples_y = train_apples.drop(columns=["Quality"]), train_apples["Quality"]
test_apples_x , test_apples_y = test_apples.drop(columns=["Quality"]) , test_apples["Quality"]
return train_apples_x, train_apples_y, test_apples_x , test_apples_y
def main_decision_tree():
train_X, train_Y, test_X, test_Y = load_data()
best_accuracy = 0
best_depth = 0
accuracies = []
times = []
depths = list(range(1, 19))
for max_depth in depths:
start_time = time.time()
tree = DecisionTreeClassifier(max_depth=max_depth, random_state=42)
tree.fit(train_X, train_Y)
total_time = time.time() - start_time
y_prediction = tree.predict(test_X)
accuracy = accuracy_score(test_Y, y_prediction)
accuracies.append(accuracy)
times.append(total_time)
if accuracy > best_accuracy:
best_accuracy = accuracy
best_depth = max_depth
# Accuracy plot
print("last accuracy ", accuracies[-1], depths[-1])
print("best accuracy ", best_accuracy, best_depth)
_, axs = plt.subplots(1, 2)
axs[0].plot(depths, accuracies)
axs[0].set(xlabel="Maksimalus gylis", ylabel="Tikslumas")
axs[1].plot(depths, times)
axs[1].set(xlabel="Maksimalus gylis", ylabel="Mokymosi trukmė")
plt.show()
# Confusion matrix
# cm = confusion_matrix(test_apples_y, y_prediction, labels=tree.classes_)
# disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=tree.classes_)
# disp.plot()
# plt.show()
# Visualize tree
# class_names = apples["Quality"].unique().astype(str).tolist()
# failas = export_graphviz(tree, out_file=None,
# feature_names=apples.drop(columns=["Quality"]).columns,
# class_names=class_names,
# filled=True, rounded=True,
# special_characters=True)
# graph = graphviz.Source(failas)
# graph.render()
def main_random_forest():
train_X, train_Y, test_X, test_Y = load_data()
forest = RandomForestClassifier(5, random_state=42)
forest.fit(train_X, train_Y)
y_prediction = forest.predict(test_X)
accuracy = accuracy_score(test_Y, y_prediction)
print(max(tree.get_depth() for tree in forest.estimators_))
print(accuracy)
cm = confusion_matrix(test_Y, y_prediction, labels=forest.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=forest.classes_)
disp.plot()
plt.show()
# best_accuracy = 0
# best_forest_size = 0
# accuracies = []
# forest_sizes = list(range(3, 9+1, 5))
# for forest_size in forest_sizes:
# forest = RandomForestClassifier(forest_size, random_state=42)
# forest.fit(train_X, train_Y)
# y_prediction = forest.predict(test_X)
# accuracy = accuracy_score(test_Y, y_prediction)
# accuracies.append(accuracy)
# if accuracy > best_accuracy:
# best_accuracy = accuracy
# best_forest_size = forest_size
# print("best", best_accuracy, best_forest_size)
# plt.plot(forest_sizes, accuracies)
# plt.xlabel("Medžių kiekis")
# plt.ylabel("Tikslumas")
# plt.show()
main_random_forest()