import pandas as pd import matplotlib.pyplot as plt import matplotlib.patches as mpatches from sklearn.linear_model import LinearRegression import numpy as np # Warning, this is not great code. Just something hacked together for the report. def create_dataset(sunspots, n = 2): P = [] T = [] for i in range(len(sunspots) - n): P.append(np.array(sunspots["activity"][i:(i+n)])) T.append(sunspots["activity"][i+n]) return P, T # 1. & 2. sunspots = pd.read_csv("sunspot.txt", delimiter='\t', header=None, names=["year", "activity"]) # 4. if False: plt.plot(sunspots['year'], sunspots["activity"]) plt.xlabel("Metai") plt.ylabel("Aktyvumas") plt.title(f"Saulės aktyvumas tarp {min(sunspots['year'])} ir {max(sunspots['year'])} metų") plt.show() # 5. P, T = create_dataset(sunspots) # 6. def draw_scatterplot(Xs, Ys, marker = 'o'): ax = plt.subplot(projection='3d') X_coords = [] Y_coords = [] Z_coords = [] for i in range(len(Xs)): X_coords.append(Xs[i][0]) Y_coords.append(Xs[i][1]) Z_coords.append(Ys[i]) ax.scatter(X_coords, Y_coords, Z_coords, marker=marker) ax.set_xlabel('Užpraitų metų aktyvumas') ax.set_ylabel('Praitų metų aktyvumas') ax.set_zlabel('Šių metų aktyvumas') ax.set_title('Šių metų aktyvumo priklausomybė nuo praėjusių metų') return ax if False: draw_scatterplot(P, T) plt.show() # 7. assert len(P) > 200 Pu = P[:200] Tu = T[:200] # 8. model = LinearRegression().fit(Pu, Tu) # 9. print(f"intercept: {model.intercept_}") print(f"coefficients: {model.coef_}") if False: w1 = model.coef_[0] w2 = model.coef_[1] b = model.intercept_ x1 = max(p[0] for p in P) x2 = max(p[1] for p in P) ax = draw_scatterplot(P, T) ax.plot([0, x1], [0, x2], 'r', zs=[b, x1*w1 + x2*w2 + b]) plt.show() # 10. if False: draw_scatterplot(Pu, model.predict(Pu)) draw_scatterplot(Pu, Tu) plt.legend(handles=[ mpatches.Patch(color='blue', label='Tikrasis'), mpatches.Patch(color='orange', label='Numatytas') ]) plt.show() # 11. if False: e = np.array(Tu) - model.predict(Pu) draw_scatterplot(Pu, e) plt.show() # 12. if False: e = np.array(Tu) - model.predict(Pu) plt.hist(e, bins=40) plt.xlabel("Klaida") plt.ylabel("Dažnis") plt.show() # 13. e = np.array(Tu) - model.predict(Pu) print("MSE (train)", np.sum(e**2) / len(e)) print("MAD (train)", np.median(np.abs(e))) e = np.array(T[200:]) - model.predict(P[200:]) print("MSE (test)", np.sum(e**2) / len(e)) print("MAD (test)", np.median(np.abs(e))) # 14. & 15. & 16. class AdaptiveLinearNeuron(object): def __init__(self, rate = 0.01, max_niter = 10, mse_goal = -1): self.rate = rate self.max_niter = max_niter self.mse_goal = mse_goal def fit(self, X, y): """Fit training data X : Training vectors, X.shape : [#samples, #features] y : Target values, y.shape : [#samples] """ # weights self.weight = np.zeros(1 + X.shape[1]) # Number of misclassifications self.errors = [] # Cost function self.cost = [] for i in range(self.max_niter): output = self.net_input(X) errors = y - output self.weight[1:] += self.rate * X.T.dot(errors) self.weight[0] += self.rate * errors.sum() cost = (errors**2).sum() / 2.0 self.cost.append(cost) mse = np.average(errors**2) if self.mse_goal != -1 and mse < self.mse_goal: break return self def net_input(self, X): """Calculate net input""" return np.dot(X, self.weight[1:]) + self.weight[0] def activation(self, X): """Compute linear activation""" return self.net_input(X) def predict(self, X): """Return class label after unit step""" return np.where(self.activation(X) >= 0.0, 1, -1) # print("---------------") # for lr in [5e-5, 1e-5, 5e-6, 1e-6, 5e-7, 1e-7, 5e-8, 1e-8, 5e-9, 1e-9]: # aln = AdaptiveLinearNeuron(lr, 100000, 300).fit(np.array(Pu), np.array(Tu)) # print(lr, aln.cost[-1], len(aln.cost)) print("---------------") aln = AdaptiveLinearNeuron(0.000001, 1000, 280).fit(np.array(Pu), np.array(Tu)) if False: plt.plot(aln.cost, marker='o') plt.xlabel('Epochs') plt.ylabel('Sum-squared-error') plt.title('Adaptive Linear Neuron - Learning rate 0.000001') plt.show() # 17. print("iterations", len(aln.cost)) print("b", aln.weight[0]) print("w1", aln.weight[1]) print("w2", aln.weight[2]) # 18. # * Taip konverguoja, nes pasiekiama užsibrėžta MSE riba # * b ~ 0.17, w1 ~ -0.58, w2 ~ 1.46 # 19. # Didinti 'lr' nelabai galima, nes tada pradės nekonverguoti ir įvyks slankaus kablelio klaidos # Mažinant 'lr' tiesiog prailgina mokymosi ilgiai tiesiškai # 20. print("------ Didinimas ---------") for n in [2, 4, 6, 8, 10]: print(f"====== n = {n}") P, T = create_dataset(sunspots, n) assert len(P) > 200 Pu = np.array(P[:200]) Tu = np.array(T[:200]) # aln = AdaptiveLinearNeuron(1e-7, 1000).fit(Pu, Tu) # plt.plot(aln.cost, marker='o') # plt.xlabel('Epochs') # plt.ylabel('Sum-squared-error') # plt.title('Adaptive Linear Neuron - Learning rate 0.000001') # plt.show() # print(f"cost (n={n}) ", aln.cost[-1]) # print(f"iterations (n={n}) ", len(aln.cost)) model = LinearRegression().fit(Pu, Tu) e = Tu - model.predict(Pu) print("MSE (train)", np.average(e**2)) print("MAD (train)", np.median(np.abs(e))) e = np.array(T[200:]) - model.predict(P[200:]) print("MSE (test)", np.average(e**2)) print("MAD (test)", np.median(np.abs(e)))