212 lines
5.6 KiB
Python
212 lines
5.6 KiB
Python
import pandas as pd
|
|
import matplotlib.pyplot as plt
|
|
import matplotlib.patches as mpatches
|
|
from sklearn.linear_model import LinearRegression
|
|
import numpy as np
|
|
|
|
# Warning, this is not great code. Just something hacked together for the report.
|
|
|
|
def create_dataset(sunspots, n = 2):
|
|
P = []
|
|
T = []
|
|
for i in range(len(sunspots) - n):
|
|
P.append(np.array(sunspots["activity"][i:(i+n)]))
|
|
T.append(sunspots["activity"][i+n])
|
|
return P, T
|
|
|
|
# 1. & 2.
|
|
sunspots = pd.read_csv("sunspot.txt", delimiter='\t', header=None, names=["year", "activity"])
|
|
|
|
# 4.
|
|
if False:
|
|
plt.plot(sunspots['year'], sunspots["activity"])
|
|
plt.xlabel("Metai")
|
|
plt.ylabel("Aktyvumas")
|
|
plt.title(f"Saulės aktyvumas tarp {min(sunspots['year'])} ir {max(sunspots['year'])} metų")
|
|
plt.show()
|
|
|
|
# 5.
|
|
P, T = create_dataset(sunspots)
|
|
|
|
# 6.
|
|
def draw_scatterplot(Xs, Ys, marker = 'o'):
|
|
ax = plt.subplot(projection='3d')
|
|
X_coords = []
|
|
Y_coords = []
|
|
Z_coords = []
|
|
for i in range(len(Xs)):
|
|
X_coords.append(Xs[i][0])
|
|
Y_coords.append(Xs[i][1])
|
|
Z_coords.append(Ys[i])
|
|
|
|
ax.scatter(X_coords, Y_coords, Z_coords, marker=marker)
|
|
ax.set_xlabel('Užpraitų metų aktyvumas')
|
|
ax.set_ylabel('Praitų metų aktyvumas')
|
|
ax.set_zlabel('Šių metų aktyvumas')
|
|
ax.set_title('Šių metų aktyvumo priklausomybė nuo praėjusių metų')
|
|
|
|
return ax
|
|
|
|
if False:
|
|
draw_scatterplot(P, T)
|
|
plt.show()
|
|
|
|
# 7.
|
|
assert len(P) > 200
|
|
Pu = P[:200]
|
|
Tu = T[:200]
|
|
|
|
# 8.
|
|
model = LinearRegression().fit(Pu, Tu)
|
|
|
|
# 9.
|
|
print(f"intercept: {model.intercept_}")
|
|
print(f"coefficients: {model.coef_}")
|
|
|
|
if False:
|
|
w1 = model.coef_[0]
|
|
w2 = model.coef_[1]
|
|
b = model.intercept_
|
|
|
|
x1 = max(p[0] for p in P)
|
|
x2 = max(p[1] for p in P)
|
|
ax = draw_scatterplot(P, T)
|
|
ax.plot([0, x1], [0, x2], 'r', zs=[b, x1*w1 + x2*w2 + b])
|
|
plt.show()
|
|
|
|
# 10.
|
|
if False:
|
|
draw_scatterplot(Pu, model.predict(Pu))
|
|
draw_scatterplot(Pu, Tu)
|
|
|
|
plt.legend(handles=[
|
|
mpatches.Patch(color='blue', label='Tikrasis'),
|
|
mpatches.Patch(color='orange', label='Numatytas')
|
|
])
|
|
plt.show()
|
|
|
|
# 11.
|
|
if False:
|
|
e = np.array(Tu) - model.predict(Pu)
|
|
draw_scatterplot(Pu, e)
|
|
plt.show()
|
|
|
|
# 12.
|
|
if False:
|
|
e = np.array(Tu) - model.predict(Pu)
|
|
plt.hist(e, bins=40)
|
|
plt.xlabel("Klaida")
|
|
plt.ylabel("Dažnis")
|
|
plt.show()
|
|
|
|
# 13.
|
|
e = np.array(Tu) - model.predict(Pu)
|
|
print("MSE (train)", np.sum(e**2) / len(e))
|
|
print("MAD (train)", np.median(np.abs(e)))
|
|
|
|
e = np.array(T[200:]) - model.predict(P[200:])
|
|
print("MSE (test)", np.sum(e**2) / len(e))
|
|
print("MAD (test)", np.median(np.abs(e)))
|
|
|
|
# 14. & 15. & 16.
|
|
class AdaptiveLinearNeuron(object):
|
|
def __init__(self, rate = 0.01, max_niter = 10, mse_goal = -1):
|
|
self.rate = rate
|
|
self.max_niter = max_niter
|
|
self.mse_goal = mse_goal
|
|
|
|
def fit(self, X, y):
|
|
"""Fit training data
|
|
X : Training vectors, X.shape : [#samples, #features]
|
|
y : Target values, y.shape : [#samples]
|
|
"""
|
|
|
|
# weights
|
|
self.weight = np.zeros(1 + X.shape[1])
|
|
|
|
# Number of misclassifications
|
|
self.errors = []
|
|
|
|
# Cost function
|
|
self.cost = []
|
|
|
|
for i in range(self.max_niter):
|
|
output = self.net_input(X)
|
|
errors = y - output
|
|
self.weight[1:] += self.rate * X.T.dot(errors)
|
|
self.weight[0] += self.rate * errors.sum()
|
|
cost = (errors**2).sum() / 2.0
|
|
self.cost.append(cost)
|
|
|
|
mse = np.average(errors**2)
|
|
if self.mse_goal != -1 and mse < self.mse_goal: break
|
|
return self
|
|
|
|
def net_input(self, X):
|
|
"""Calculate net input"""
|
|
return np.dot(X, self.weight[1:]) + self.weight[0]
|
|
|
|
def activation(self, X):
|
|
"""Compute linear activation"""
|
|
return self.net_input(X)
|
|
|
|
def predict(self, X):
|
|
"""Return class label after unit step"""
|
|
return np.where(self.activation(X) >= 0.0, 1, -1)
|
|
|
|
|
|
# print("---------------")
|
|
# for lr in [5e-5, 1e-5, 5e-6, 1e-6, 5e-7, 1e-7, 5e-8, 1e-8, 5e-9, 1e-9]:
|
|
# aln = AdaptiveLinearNeuron(lr, 100000, 300).fit(np.array(Pu), np.array(Tu))
|
|
|
|
# print(lr, aln.cost[-1], len(aln.cost))
|
|
|
|
print("---------------")
|
|
aln = AdaptiveLinearNeuron(0.000001, 1000, 280).fit(np.array(Pu), np.array(Tu))
|
|
if False:
|
|
plt.plot(aln.cost, marker='o')
|
|
plt.xlabel('Epochs')
|
|
plt.ylabel('Sum-squared-error')
|
|
plt.title('Adaptive Linear Neuron - Learning rate 0.000001')
|
|
plt.show()
|
|
|
|
# 17.
|
|
print("iterations", len(aln.cost))
|
|
print("b", aln.weight[0])
|
|
print("w1", aln.weight[1])
|
|
print("w2", aln.weight[2])
|
|
|
|
# 18.
|
|
# * Taip konverguoja, nes pasiekiama užsibrėžta MSE riba
|
|
# * b ~ 0.17, w1 ~ -0.58, w2 ~ 1.46
|
|
|
|
# 19.
|
|
# Didinti 'lr' nelabai galima, nes tada pradės nekonverguoti ir įvyks slankaus kablelio klaidos
|
|
# Mažinant 'lr' tiesiog prailgina mokymosi ilgiai tiesiškai
|
|
|
|
# 20.
|
|
print("------ Didinimas ---------")
|
|
for n in [2, 4, 6, 8, 10]:
|
|
print(f"====== n = {n}")
|
|
P, T = create_dataset(sunspots, n)
|
|
assert len(P) > 200
|
|
Pu = np.array(P[:200])
|
|
Tu = np.array(T[:200])
|
|
|
|
# aln = AdaptiveLinearNeuron(1e-7, 1000).fit(Pu, Tu)
|
|
# plt.plot(aln.cost, marker='o')
|
|
# plt.xlabel('Epochs')
|
|
# plt.ylabel('Sum-squared-error')
|
|
# plt.title('Adaptive Linear Neuron - Learning rate 0.000001')
|
|
# plt.show()
|
|
# print(f"cost (n={n}) ", aln.cost[-1])
|
|
# print(f"iterations (n={n}) ", len(aln.cost))
|
|
|
|
model = LinearRegression().fit(Pu, Tu)
|
|
e = Tu - model.predict(Pu)
|
|
print("MSE (train)", np.average(e**2))
|
|
print("MAD (train)", np.median(np.abs(e)))
|
|
|
|
e = np.array(T[200:]) - model.predict(P[200:])
|
|
print("MSE (test)", np.average(e**2))
|
|
print("MAD (test)", np.median(np.abs(e))) |