261 lines
8.6 KiB
Plaintext
261 lines
8.6 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "c123b4de-9116-4150-8de9-8b171666d8b9",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Vienasluoksniai tinklai - tiesinės regresijos uždavinys\n",
|
|
"\n",
|
|
"Tiesinės lygties pritaikymas duotam duomenų rinkiniui n-matėje erdvėje vadinamas tiesine regresija. Toliau pateiktame paveikslėlyje parodytas tiesinės regresijos pavyzdys. Paprastai tariant, bandoma rasti geriausias $w$ ir $b$ parametrų reikšmes, kurios geriausiai atitiktų duomenų rinkinį. Tuomet, gavę geriausią įmanomą įvertį, galime prognozuoti $y$ reikšmes, turėdami $x$.\n",
|
|
"\n",
|
|
""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"id": "4171cf08-31ca-4608-b187-f2ac40a6009b",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import numpy as np\n",
|
|
"import pandas as pd\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"from matplotlib.pylab import rcParams\n",
|
|
"from sklearn.datasets import make_regression\n",
|
|
"from sklearn.model_selection import train_test_split\n",
|
|
"from sklearn.metrics import r2_score \n",
|
|
"\n",
|
|
"%matplotlib inline"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"id": "73eef68e-fb6c-4129-b87c-c3e268a7de76",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def plot_graph(X, y, pred_line=None, losses=None):\n",
|
|
" \n",
|
|
" plots = 2 if losses!=None else 1\n",
|
|
" \n",
|
|
" fig = plt.figure(figsize=(8 * plots, 6))\n",
|
|
" \n",
|
|
" ax1 = fig.add_subplot(1, plots, 1)\n",
|
|
" ax1.scatter(X, y, alpha=0.8) # Plot the original set of datapoints\n",
|
|
" \n",
|
|
" if(pred_line != None):\n",
|
|
" x_line, y_line = pred_line['x_line'], pred_line['y_line']\n",
|
|
" ax1.plot(x_line, y_line, linewidth=2, markersize=12, color='red', alpha=0.8) # Plot the randomly generated line\n",
|
|
" ax1.set_title('Predicted Line on set of Datapoints')\n",
|
|
" else:\n",
|
|
" ax1.set_title('Plot of Datapoints generated')\n",
|
|
" \n",
|
|
" ax1.set_xlabel('x')\n",
|
|
" ax1.set_ylabel('y')\n",
|
|
" \n",
|
|
" if(losses!=None):\n",
|
|
" ax2 = fig.add_subplot(1, plots, 2)\n",
|
|
" ax2.plot(np.arange(len(losses)), losses, marker='o')\n",
|
|
" \n",
|
|
" ax2.set_xlabel('Epoch')\n",
|
|
" ax2.set_ylabel('Loss')\n",
|
|
" ax2.set_title('Loss')\n",
|
|
"\n",
|
|
" plt.show()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"id": "64294a9a-2018-4a37-8dbf-2b7a242fa72c",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def plot_pred_line(X, y, w, b,losses=None):\n",
|
|
" # Generate a set of datapoints on x for creating a line.\n",
|
|
" # We shall consider the range of X_train for generating the line so that the line superposes the datapoints.\n",
|
|
" x_line = np.linspace(np.min(X), np.max(X), 10) \n",
|
|
" \n",
|
|
" # Calculate the corresponding y with the parameter values of m & b\n",
|
|
" y_line = w * x_line + b \n",
|
|
" \n",
|
|
" plot_graph(X, y, pred_line={'x_line': x_line, 'y_line':y_line}, losses=losses)\n",
|
|
" \n",
|
|
" return "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"id": "ac81d49e-6e39-4dd1-97cc-49d850877744",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def forward_prop(X, w, b):\n",
|
|
" #y_pred = w * X + b\n",
|
|
" y_pred = np.reshape(np.sum(w*X,1),(X.shape[0],1)) + b\n",
|
|
" return y_pred"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"id": "3f35616b-0a5c-41be-811b-6f15d436a10c",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def compute_loss(y, y_pred):\n",
|
|
" loss = np.mean((y_pred - y)**2)\n",
|
|
" \n",
|
|
" return loss"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"id": "06a40434-da8f-4176-96d5-ab162bc843e5",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def grad_desc(w, b, X_train, y_train, y_pred):\n",
|
|
" dw = np.mean(2*(y_pred - y_train) * X_train)\n",
|
|
" db = np.mean(2*(y_pred - y_train))\n",
|
|
" \n",
|
|
" return dw, db"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"id": "31386096-b418-4041-83f6-d689a87da77d",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def back_prop(X_train, y_train, y_pred, w, b, l_r):\n",
|
|
" dw, db = grad_desc(w, b, X_train, y_train, y_pred)\n",
|
|
" \n",
|
|
" w -= l_r * dw\n",
|
|
" b -= l_r * db\n",
|
|
" \n",
|
|
" return w, b"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"id": "8fe4faa0-96c3-40f9-b025-6f81002ba483",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"ename": "NameError",
|
|
"evalue": "name 'make_regression' is not defined",
|
|
"output_type": "error",
|
|
"traceback": [
|
|
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
|
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
|
|
"Cell \u001b[0;32mIn[1], line 13\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[38;5;66;03m# Number of iterations for updates - Define during explanation\u001b[39;00m\n\u001b[1;32m 11\u001b[0m epochs \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m300\u001b[39m\n\u001b[0;32m---> 13\u001b[0m X, y \u001b[38;5;241m=\u001b[39m \u001b[43mmake_regression\u001b[49m(n_samples\u001b[38;5;241m=\u001b[39mM, n_features\u001b[38;5;241m=\u001b[39mn, n_informative\u001b[38;5;241m=\u001b[39mn, n_targets\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m, random_state\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m42\u001b[39m, noise\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m10\u001b[39m)\n\u001b[1;32m 14\u001b[0m y \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mreshape(y,(y\u001b[38;5;241m.\u001b[39msize, \u001b[38;5;241m1\u001b[39m))\n\u001b[1;32m 16\u001b[0m m \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mrandom\u001b[38;5;241m.\u001b[39mnormal(scale\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m10\u001b[39m)\n",
|
|
"\u001b[0;31mNameError\u001b[0m: name 'make_regression' is not defined"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# Sample size\n",
|
|
"M = 200\n",
|
|
"\n",
|
|
"# No. of input features\n",
|
|
"n = 1\n",
|
|
"\n",
|
|
"# Learning Rate - Define during explanation\n",
|
|
"l_r = 0.01\n",
|
|
"\n",
|
|
"# Number of iterations for updates - Define during explanation\n",
|
|
"epochs = 300\n",
|
|
"\n",
|
|
"X, y = make_regression(n_samples=M, n_features=n, n_informative=n, n_targets=1, random_state=42, noise=10)\n",
|
|
"y = np.reshape(y,(y.size, 1))\n",
|
|
"\n",
|
|
"m = np.random.normal(scale=10)\n",
|
|
"b = np.random.normal(scale=10)\n",
|
|
"w = np.random.normal(scale=10, size=(X.shape[1],)) \n",
|
|
"\n",
|
|
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)\n",
|
|
"\n",
|
|
"losses = []\n",
|
|
"\n",
|
|
"for i in range(epochs):\n",
|
|
" y_pred = forward_prop(X_train, w, b)\n",
|
|
" \n",
|
|
" #print(y_pred)\n",
|
|
" \n",
|
|
" loss = compute_loss(y_train, y_pred)\n",
|
|
" losses.append(loss)\n",
|
|
"\n",
|
|
" m, b = back_prop(X_train, y_train, y_pred, w, b, l_r)\n",
|
|
"\n",
|
|
" if(i%10==0):\n",
|
|
" print('Epoch: ', i)\n",
|
|
" print('Loss = ', loss)\n",
|
|
" plot_pred_line(X_train, y_train, w, b, losses)\n",
|
|
"\n",
|
|
"del losses[:]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"id": "b7966eca-eba4-420d-90e3-4ffdda5b44cb",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Prediction: \n",
|
|
"Loss = 111.2313597749226\n",
|
|
"R2 = 0.9746%\n",
|
|
"\n",
|
|
"w = [87.50631143]\n",
|
|
"b = 2.1422612255336815\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"print('Prediction: ')\n",
|
|
"y_pred = forward_prop(X_test, w, b)\n",
|
|
"loss = compute_loss(y_test, y_pred)\n",
|
|
"#print(np.hstack([y_test,y_pred]))\n",
|
|
"print('Loss = ', loss)\n",
|
|
"r2 = r2_score(y_pred, y_test)\n",
|
|
"print('R2 = {}%'.format(round(r2, 4)))\n",
|
|
"\n",
|
|
"print('\\nw = ', w)\n",
|
|
"print('b = ', b)\n"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.8.16"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|