# Vienasluoksniai tinklai - tiesinės regresijos uždavinys

Tiesinės lygties pritaikymas duotam duomenų rinkiniui n-matėje erdvėje vadinamas tiesine regresija. Toliau pateiktame paveikslėlyje parodytas tiesinės regresijos pavyzdys. Paprastai tariant, bandoma rasti geriausias $w$ ir $b$ parametrų reikšmes, kurios geriausiai atitiktų duomenų rinkinį. Tuomet, gavę geriausią įmanomą įvertį, galime prognozuoti $y$ reikšmes, turėdami $x$.

![Tiesinė regresija](linear-regression.gif)

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.pylab import rcParams
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score 

%matplotlib inline

In [2]:
def plot_graph(X, y, pred_line=None, losses=None):
 
 plots = 2 if losses!=None else 1
 
 fig = plt.figure(figsize=(8 * plots, 6))
 
 ax1 = fig.add_subplot(1, plots, 1)
 ax1.scatter(X, y, alpha=0.8) # Plot the original set of datapoints
 
 if(pred_line != None):
 x_line, y_line = pred_line['x_line'], pred_line['y_line']
 ax1.plot(x_line, y_line, linewidth=2, markersize=12, color='red', alpha=0.8) # Plot the randomly generated line
 ax1.set_title('Predicted Line on set of Datapoints')
 else:
 ax1.set_title('Plot of Datapoints generated')
 
 ax1.set_xlabel('x')
 ax1.set_ylabel('y')
 
 if(losses!=None):
 ax2 = fig.add_subplot(1, plots, 2)
 ax2.plot(np.arange(len(losses)), losses, marker='o')
 
 ax2.set_xlabel('Epoch')
 ax2.set_ylabel('Loss')
 ax2.set_title('Loss')

 plt.show()

In [3]:
def plot_pred_line(X, y, w, b,losses=None):
 # Generate a set of datapoints on x for creating a line.
 # We shall consider the range of X_train for generating the line so that the line superposes the datapoints.
 x_line = np.linspace(np.min(X), np.max(X), 10) 
 
 # Calculate the corresponding y with the parameter values of m & b
 y_line = w * x_line + b 
 
 plot_graph(X, y, pred_line={'x_line': x_line, 'y_line':y_line}, losses=losses)
 
 return 

In [4]:
def forward_prop(X, w, b):
 #y_pred = w * X + b
 y_pred = np.reshape(np.sum(w*X,1),(X.shape[0],1)) + b
 return y_pred

In [5]:
def compute_loss(y, y_pred):
 loss = np.mean((y_pred - y)**2)
 
 return loss

In [6]:
def grad_desc(w, b, X_train, y_train, y_pred):
 dw = np.mean(2*(y_pred - y_train) * X_train)
 db = np.mean(2*(y_pred - y_train))
 
 return dw, db

In [7]:
def back_prop(X_train, y_train, y_pred, w, b, l_r):
 dw, db = grad_desc(w, b, X_train, y_train, y_pred)
 
 w -= l_r * dw
 b -= l_r * db
 
 return w, b

In [1]:
# Sample size
M = 200

# No. of input features
n = 1

# Learning Rate - Define during explanation
l_r = 0.01

# Number of iterations for updates - Define during explanation
epochs = 300

X, y = make_regression(n_samples=M, n_features=n, n_informative=n, n_targets=1, random_state=42, noise=10)
y = np.reshape(y,(y.size, 1))

m = np.random.normal(scale=10)
b = np.random.normal(scale=10)
w = np.random.normal(scale=10, size=(X.shape[1],)) 

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

losses = []

for i in range(epochs):
 y_pred = forward_prop(X_train, w, b)
 
 #print(y_pred)
 
 loss = compute_loss(y_train, y_pred)
 losses.append(loss)

 m, b = back_prop(X_train, y_train, y_pred, w, b, l_r)

 if(i%10==0):
 print('Epoch: ', i)
 print('Loss = ', loss)
 plot_pred_line(X_train, y_train, w, b, losses)

del losses[:]

NameError: name 'make_regression' is not defined

In [9]:
print('Prediction: ')
y_pred = forward_prop(X_test, w, b)
loss = compute_loss(y_test, y_pred)
#print(np.hstack([y_test,y_pred]))
print('Loss = ', loss)
r2 = r2_score(y_pred, y_test)
print('R2 = {}%'.format(round(r2, 4)))

print('\nw = ', w)
print('b = ', b)


Prediction: 
Loss = 111.2313597749226
R2 = 0.9746%

w = [87.50631143]
b = 2.1422612255336815
