1
0
intelektikos-pagrindai/lab3_1.py
2024-05-13 01:32:51 +03:00

212 lines
5.6 KiB
Python

import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from sklearn.linear_model import LinearRegression
import numpy as np
# Warning, this is not great code. Just something hacked together for the report.
def create_dataset(sunspots, n = 2):
P = []
T = []
for i in range(len(sunspots) - n):
P.append(np.array(sunspots["activity"][i:(i+n)]))
T.append(sunspots["activity"][i+n])
return P, T
# 1. & 2.
sunspots = pd.read_csv("sunspot.txt", delimiter='\t', header=None, names=["year", "activity"])
# 4.
if False:
plt.plot(sunspots['year'], sunspots["activity"])
plt.xlabel("Metai")
plt.ylabel("Aktyvumas")
plt.title(f"Saulės aktyvumas tarp {min(sunspots['year'])} ir {max(sunspots['year'])} metų")
plt.show()
# 5.
P, T = create_dataset(sunspots)
# 6.
def draw_scatterplot(Xs, Ys, marker = 'o'):
ax = plt.subplot(projection='3d')
X_coords = []
Y_coords = []
Z_coords = []
for i in range(len(Xs)):
X_coords.append(Xs[i][0])
Y_coords.append(Xs[i][1])
Z_coords.append(Ys[i])
ax.scatter(X_coords, Y_coords, Z_coords, marker=marker)
ax.set_xlabel('Užpraitų metų aktyvumas')
ax.set_ylabel('Praitų metų aktyvumas')
ax.set_zlabel('Šių metų aktyvumas')
ax.set_title('Šių metų aktyvumo priklausomybė nuo praėjusių metų')
return ax
if False:
draw_scatterplot(P, T)
plt.show()
# 7.
assert len(P) > 200
Pu = P[:200]
Tu = T[:200]
# 8.
model = LinearRegression().fit(Pu, Tu)
# 9.
print(f"intercept: {model.intercept_}")
print(f"coefficients: {model.coef_}")
if False:
w1 = model.coef_[0]
w2 = model.coef_[1]
b = model.intercept_
x1 = max(p[0] for p in P)
x2 = max(p[1] for p in P)
ax = draw_scatterplot(P, T)
ax.plot([0, x1], [0, x2], 'r', zs=[b, x1*w1 + x2*w2 + b])
plt.show()
# 10.
if False:
draw_scatterplot(Pu, model.predict(Pu))
draw_scatterplot(Pu, Tu)
plt.legend(handles=[
mpatches.Patch(color='blue', label='Tikrasis'),
mpatches.Patch(color='orange', label='Numatytas')
])
plt.show()
# 11.
if False:
e = np.array(Tu) - model.predict(Pu)
draw_scatterplot(Pu, e)
plt.show()
# 12.
if False:
e = np.array(Tu) - model.predict(Pu)
plt.hist(e, bins=40)
plt.xlabel("Klaida")
plt.ylabel("Dažnis")
plt.show()
# 13.
e = np.array(Tu) - model.predict(Pu)
print("MSE (train)", np.sum(e**2) / len(e))
print("MAD (train)", np.median(np.abs(e)))
e = np.array(T[200:]) - model.predict(P[200:])
print("MSE (test)", np.sum(e**2) / len(e))
print("MAD (test)", np.median(np.abs(e)))
# 14. & 15. & 16.
class AdaptiveLinearNeuron(object):
def __init__(self, rate = 0.01, max_niter = 10, mse_goal = -1):
self.rate = rate
self.max_niter = max_niter
self.mse_goal = mse_goal
def fit(self, X, y):
"""Fit training data
X : Training vectors, X.shape : [#samples, #features]
y : Target values, y.shape : [#samples]
"""
# weights
self.weight = np.zeros(1 + X.shape[1])
# Number of misclassifications
self.errors = []
# Cost function
self.cost = []
for i in range(self.max_niter):
output = self.net_input(X)
errors = y - output
self.weight[1:] += self.rate * X.T.dot(errors)
self.weight[0] += self.rate * errors.sum()
cost = (errors**2).sum() / 2.0
self.cost.append(cost)
mse = np.average(errors**2)
if self.mse_goal != -1 and mse < self.mse_goal: break
return self
def net_input(self, X):
"""Calculate net input"""
return np.dot(X, self.weight[1:]) + self.weight[0]
def activation(self, X):
"""Compute linear activation"""
return self.net_input(X)
def predict(self, X):
"""Return class label after unit step"""
return np.where(self.activation(X) >= 0.0, 1, -1)
# print("---------------")
# for lr in [5e-5, 1e-5, 5e-6, 1e-6, 5e-7, 1e-7, 5e-8, 1e-8, 5e-9, 1e-9]:
# aln = AdaptiveLinearNeuron(lr, 100000, 300).fit(np.array(Pu), np.array(Tu))
# print(lr, aln.cost[-1], len(aln.cost))
print("---------------")
aln = AdaptiveLinearNeuron(0.000001, 1000, 280).fit(np.array(Pu), np.array(Tu))
if False:
plt.plot(aln.cost, marker='o')
plt.xlabel('Epochs')
plt.ylabel('Sum-squared-error')
plt.title('Adaptive Linear Neuron - Learning rate 0.000001')
plt.show()
# 17.
print("iterations", len(aln.cost))
print("b", aln.weight[0])
print("w1", aln.weight[1])
print("w2", aln.weight[2])
# 18.
# * Taip konverguoja, nes pasiekiama užsibrėžta MSE riba
# * b ~ 0.17, w1 ~ -0.58, w2 ~ 1.46
# 19.
# Didinti 'lr' nelabai galima, nes tada pradės nekonverguoti ir įvyks slankaus kablelio klaidos
# Mažinant 'lr' tiesiog prailgina mokymosi ilgiai tiesiškai
# 20.
print("------ Didinimas ---------")
for n in [2, 4, 6, 8, 10]:
print(f"====== n = {n}")
P, T = create_dataset(sunspots, n)
assert len(P) > 200
Pu = np.array(P[:200])
Tu = np.array(T[:200])
# aln = AdaptiveLinearNeuron(1e-7, 1000).fit(Pu, Tu)
# plt.plot(aln.cost, marker='o')
# plt.xlabel('Epochs')
# plt.ylabel('Sum-squared-error')
# plt.title('Adaptive Linear Neuron - Learning rate 0.000001')
# plt.show()
# print(f"cost (n={n}) ", aln.cost[-1])
# print(f"iterations (n={n}) ", len(aln.cost))
model = LinearRegression().fit(Pu, Tu)
e = Tu - model.predict(Pu)
print("MSE (train)", np.average(e**2))
print("MAD (train)", np.median(np.abs(e)))
e = np.array(T[200:]) - model.predict(P[200:])
print("MSE (test)", np.average(e**2))
print("MAD (test)", np.median(np.abs(e)))