1
0
neuroniniu-tinklu-metodai/Lab1/examples/Lab22.ipynb

261 lines
8.6 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"id": "c123b4de-9116-4150-8de9-8b171666d8b9",
"metadata": {},
"source": [
"# Vienasluoksniai tinklai - tiesinės regresijos uždavinys\n",
"\n",
"Tiesinės lygties pritaikymas duotam duomenų rinkiniui n-matėje erdvėje vadinamas tiesine regresija. Toliau pateiktame paveikslėlyje parodytas tiesinės regresijos pavyzdys. Paprastai tariant, bandoma rasti geriausias $w$ ir $b$ parametrų reikšmes, kurios geriausiai atitiktų duomenų rinkinį. Tuomet, gavę geriausią įmanomą įvertį, galime prognozuoti $y$ reikšmes, turėdami $x$.\n",
"\n",
"![Tiesinė regresija](linear-regression.gif)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "4171cf08-31ca-4608-b187-f2ac40a6009b",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"from matplotlib.pylab import rcParams\n",
"from sklearn.datasets import make_regression\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.metrics import r2_score \n",
"\n",
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "73eef68e-fb6c-4129-b87c-c3e268a7de76",
"metadata": {},
"outputs": [],
"source": [
"def plot_graph(X, y, pred_line=None, losses=None):\n",
" \n",
" plots = 2 if losses!=None else 1\n",
" \n",
" fig = plt.figure(figsize=(8 * plots, 6))\n",
" \n",
" ax1 = fig.add_subplot(1, plots, 1)\n",
" ax1.scatter(X, y, alpha=0.8) # Plot the original set of datapoints\n",
" \n",
" if(pred_line != None):\n",
" x_line, y_line = pred_line['x_line'], pred_line['y_line']\n",
" ax1.plot(x_line, y_line, linewidth=2, markersize=12, color='red', alpha=0.8) # Plot the randomly generated line\n",
" ax1.set_title('Predicted Line on set of Datapoints')\n",
" else:\n",
" ax1.set_title('Plot of Datapoints generated')\n",
" \n",
" ax1.set_xlabel('x')\n",
" ax1.set_ylabel('y')\n",
" \n",
" if(losses!=None):\n",
" ax2 = fig.add_subplot(1, plots, 2)\n",
" ax2.plot(np.arange(len(losses)), losses, marker='o')\n",
" \n",
" ax2.set_xlabel('Epoch')\n",
" ax2.set_ylabel('Loss')\n",
" ax2.set_title('Loss')\n",
"\n",
" plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "64294a9a-2018-4a37-8dbf-2b7a242fa72c",
"metadata": {},
"outputs": [],
"source": [
"def plot_pred_line(X, y, w, b,losses=None):\n",
" # Generate a set of datapoints on x for creating a line.\n",
" # We shall consider the range of X_train for generating the line so that the line superposes the datapoints.\n",
" x_line = np.linspace(np.min(X), np.max(X), 10) \n",
" \n",
" # Calculate the corresponding y with the parameter values of m & b\n",
" y_line = w * x_line + b \n",
" \n",
" plot_graph(X, y, pred_line={'x_line': x_line, 'y_line':y_line}, losses=losses)\n",
" \n",
" return "
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "ac81d49e-6e39-4dd1-97cc-49d850877744",
"metadata": {},
"outputs": [],
"source": [
"def forward_prop(X, w, b):\n",
" #y_pred = w * X + b\n",
" y_pred = np.reshape(np.sum(w*X,1),(X.shape[0],1)) + b\n",
" return y_pred"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "3f35616b-0a5c-41be-811b-6f15d436a10c",
"metadata": {},
"outputs": [],
"source": [
"def compute_loss(y, y_pred):\n",
" loss = np.mean((y_pred - y)**2)\n",
" \n",
" return loss"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "06a40434-da8f-4176-96d5-ab162bc843e5",
"metadata": {},
"outputs": [],
"source": [
"def grad_desc(w, b, X_train, y_train, y_pred):\n",
" dw = np.mean(2*(y_pred - y_train) * X_train)\n",
" db = np.mean(2*(y_pred - y_train))\n",
" \n",
" return dw, db"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "31386096-b418-4041-83f6-d689a87da77d",
"metadata": {},
"outputs": [],
"source": [
"def back_prop(X_train, y_train, y_pred, w, b, l_r):\n",
" dw, db = grad_desc(w, b, X_train, y_train, y_pred)\n",
" \n",
" w -= l_r * dw\n",
" b -= l_r * db\n",
" \n",
" return w, b"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "8fe4faa0-96c3-40f9-b025-6f81002ba483",
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'make_regression' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[1], line 13\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[38;5;66;03m# Number of iterations for updates - Define during explanation\u001b[39;00m\n\u001b[1;32m 11\u001b[0m epochs \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m300\u001b[39m\n\u001b[0;32m---> 13\u001b[0m X, y \u001b[38;5;241m=\u001b[39m \u001b[43mmake_regression\u001b[49m(n_samples\u001b[38;5;241m=\u001b[39mM, n_features\u001b[38;5;241m=\u001b[39mn, n_informative\u001b[38;5;241m=\u001b[39mn, n_targets\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m, random_state\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m42\u001b[39m, noise\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m10\u001b[39m)\n\u001b[1;32m 14\u001b[0m y \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mreshape(y,(y\u001b[38;5;241m.\u001b[39msize, \u001b[38;5;241m1\u001b[39m))\n\u001b[1;32m 16\u001b[0m m \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mrandom\u001b[38;5;241m.\u001b[39mnormal(scale\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m10\u001b[39m)\n",
"\u001b[0;31mNameError\u001b[0m: name 'make_regression' is not defined"
]
}
],
"source": [
"# Sample size\n",
"M = 200\n",
"\n",
"# No. of input features\n",
"n = 1\n",
"\n",
"# Learning Rate - Define during explanation\n",
"l_r = 0.01\n",
"\n",
"# Number of iterations for updates - Define during explanation\n",
"epochs = 300\n",
"\n",
"X, y = make_regression(n_samples=M, n_features=n, n_informative=n, n_targets=1, random_state=42, noise=10)\n",
"y = np.reshape(y,(y.size, 1))\n",
"\n",
"m = np.random.normal(scale=10)\n",
"b = np.random.normal(scale=10)\n",
"w = np.random.normal(scale=10, size=(X.shape[1],)) \n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)\n",
"\n",
"losses = []\n",
"\n",
"for i in range(epochs):\n",
" y_pred = forward_prop(X_train, w, b)\n",
" \n",
" #print(y_pred)\n",
" \n",
" loss = compute_loss(y_train, y_pred)\n",
" losses.append(loss)\n",
"\n",
" m, b = back_prop(X_train, y_train, y_pred, w, b, l_r)\n",
"\n",
" if(i%10==0):\n",
" print('Epoch: ', i)\n",
" print('Loss = ', loss)\n",
" plot_pred_line(X_train, y_train, w, b, losses)\n",
"\n",
"del losses[:]"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "b7966eca-eba4-420d-90e3-4ffdda5b44cb",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Prediction: \n",
"Loss = 111.2313597749226\n",
"R2 = 0.9746%\n",
"\n",
"w = [87.50631143]\n",
"b = 2.1422612255336815\n"
]
}
],
"source": [
"print('Prediction: ')\n",
"y_pred = forward_prop(X_test, w, b)\n",
"loss = compute_loss(y_test, y_pred)\n",
"#print(np.hstack([y_test,y_pred]))\n",
"print('Loss = ', loss)\n",
"r2 = r2_score(y_pred, y_test)\n",
"print('R2 = {}%'.format(round(r2, 4)))\n",
"\n",
"print('\\nw = ', w)\n",
"print('b = ', b)\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.16"
}
},
"nbformat": 4,
"nbformat_minor": 5
}