{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "c123b4de-9116-4150-8de9-8b171666d8b9",
   "metadata": {},
   "source": [
    "# Vienasluoksniai tinklai - tiesinės regresijos uždavinys\n",
    "\n",
    "Tiesinės lygties pritaikymas duotam duomenų rinkiniui n-matėje erdvėje vadinamas tiesine regresija. Toliau pateiktame paveikslėlyje parodytas tiesinės regresijos pavyzdys. Paprastai tariant, bandoma rasti geriausias $w$ ir $b$ parametrų reikšmes, kurios geriausiai atitiktų duomenų rinkinį. Tuomet, gavę geriausią įmanomą įvertį, galime prognozuoti $y$ reikšmes, turėdami $x$.\n",
    "\n",
    "![Tiesinė regresija](linear-regression.gif)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "4171cf08-31ca-4608-b187-f2ac40a6009b",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "from matplotlib.pylab import rcParams\n",
    "from sklearn.datasets import make_regression\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.metrics import r2_score \n",
    "\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "73eef68e-fb6c-4129-b87c-c3e268a7de76",
   "metadata": {},
   "outputs": [],
   "source": [
    "def plot_graph(X, y, pred_line=None, losses=None):\n",
    "    \n",
    "    plots = 2 if losses!=None else 1\n",
    "    \n",
    "    fig = plt.figure(figsize=(8 * plots, 6))\n",
    "        \n",
    "    ax1 = fig.add_subplot(1, plots, 1)\n",
    "    ax1.scatter(X, y, alpha=0.8)                                # Plot the original set of datapoints\n",
    "    \n",
    "    if(pred_line != None):\n",
    "        x_line, y_line = pred_line['x_line'], pred_line['y_line']\n",
    "        ax1.plot(x_line, y_line, linewidth=2, markersize=12, color='red', alpha=0.8)      # Plot the randomly generated line\n",
    "        ax1.set_title('Predicted Line on set of Datapoints')\n",
    "    else:\n",
    "        ax1.set_title('Plot of Datapoints generated')\n",
    "   \n",
    "    ax1.set_xlabel('x')\n",
    "    ax1.set_ylabel('y')\n",
    "    \n",
    "    if(losses!=None):\n",
    "        ax2 = fig.add_subplot(1, plots, 2)\n",
    "        ax2.plot(np.arange(len(losses)), losses, marker='o')\n",
    "        \n",
    "        ax2.set_xlabel('Epoch')\n",
    "        ax2.set_ylabel('Loss')\n",
    "        ax2.set_title('Loss')\n",
    "\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "64294a9a-2018-4a37-8dbf-2b7a242fa72c",
   "metadata": {},
   "outputs": [],
   "source": [
    "def plot_pred_line(X, y, w, b,losses=None):\n",
    "    # Generate a set of datapoints on x for creating a line.\n",
    "    # We shall consider the range of X_train for generating the line so that the line superposes the datapoints.\n",
    "    x_line = np.linspace(np.min(X), np.max(X), 10)             \n",
    "    \n",
    "    # Calculate the corresponding y with the parameter values of m & b\n",
    "    y_line = w * x_line + b                                                \n",
    "    \n",
    "    plot_graph(X, y, pred_line={'x_line': x_line, 'y_line':y_line}, losses=losses)\n",
    "    \n",
    "    return "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "ac81d49e-6e39-4dd1-97cc-49d850877744",
   "metadata": {},
   "outputs": [],
   "source": [
    "def forward_prop(X, w, b):\n",
    "    #y_pred = w * X + b\n",
    "    y_pred = np.reshape(np.sum(w*X,1),(X.shape[0],1)) + b\n",
    "    return y_pred"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "3f35616b-0a5c-41be-811b-6f15d436a10c",
   "metadata": {},
   "outputs": [],
   "source": [
    "def compute_loss(y, y_pred):\n",
    "    loss = np.mean((y_pred - y)**2)\n",
    "    \n",
    "    return loss"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "06a40434-da8f-4176-96d5-ab162bc843e5",
   "metadata": {},
   "outputs": [],
   "source": [
    "def grad_desc(w, b, X_train, y_train, y_pred):\n",
    "    dw = np.mean(2*(y_pred - y_train) * X_train)\n",
    "    db = np.mean(2*(y_pred - y_train))\n",
    "    \n",
    "    return dw, db"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "31386096-b418-4041-83f6-d689a87da77d",
   "metadata": {},
   "outputs": [],
   "source": [
    "def back_prop(X_train, y_train, y_pred, w, b, l_r):\n",
    "    dw, db = grad_desc(w, b, X_train, y_train, y_pred)\n",
    "    \n",
    "    w -= l_r * dw\n",
    "    b -= l_r * db\n",
    "    \n",
    "    return w, b"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "8fe4faa0-96c3-40f9-b025-6f81002ba483",
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'make_regression' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[1], line 13\u001b[0m\n\u001b[1;32m     10\u001b[0m \u001b[38;5;66;03m# Number of iterations for updates - Define during explanation\u001b[39;00m\n\u001b[1;32m     11\u001b[0m epochs \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m300\u001b[39m\n\u001b[0;32m---> 13\u001b[0m X, y \u001b[38;5;241m=\u001b[39m \u001b[43mmake_regression\u001b[49m(n_samples\u001b[38;5;241m=\u001b[39mM, n_features\u001b[38;5;241m=\u001b[39mn, n_informative\u001b[38;5;241m=\u001b[39mn, n_targets\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m, random_state\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m42\u001b[39m, noise\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m10\u001b[39m)\n\u001b[1;32m     14\u001b[0m y \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mreshape(y,(y\u001b[38;5;241m.\u001b[39msize, \u001b[38;5;241m1\u001b[39m))\n\u001b[1;32m     16\u001b[0m m \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mrandom\u001b[38;5;241m.\u001b[39mnormal(scale\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m10\u001b[39m)\n",
      "\u001b[0;31mNameError\u001b[0m: name 'make_regression' is not defined"
     ]
    }
   ],
   "source": [
    "# Sample size\n",
    "M = 200\n",
    "\n",
    "# No. of input features\n",
    "n = 1\n",
    "\n",
    "# Learning Rate - Define during explanation\n",
    "l_r = 0.01\n",
    "\n",
    "# Number of iterations for updates - Define during explanation\n",
    "epochs = 300\n",
    "\n",
    "X, y = make_regression(n_samples=M, n_features=n, n_informative=n, n_targets=1, random_state=42, noise=10)\n",
    "y = np.reshape(y,(y.size, 1))\n",
    "\n",
    "m = np.random.normal(scale=10)\n",
    "b = np.random.normal(scale=10)\n",
    "w = np.random.normal(scale=10, size=(X.shape[1],)) \n",
    "\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)\n",
    "\n",
    "losses = []\n",
    "\n",
    "for i in range(epochs):\n",
    "    y_pred = forward_prop(X_train, w, b)\n",
    "    \n",
    "    #print(y_pred)\n",
    "    \n",
    "    loss = compute_loss(y_train, y_pred)\n",
    "    losses.append(loss)\n",
    "\n",
    "    m, b = back_prop(X_train, y_train, y_pred, w, b, l_r)\n",
    "\n",
    "    if(i%10==0):\n",
    "        print('Epoch: ', i)\n",
    "        print('Loss = ', loss)\n",
    "        plot_pred_line(X_train, y_train, w, b, losses)\n",
    "\n",
    "del losses[:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "b7966eca-eba4-420d-90e3-4ffdda5b44cb",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Prediction: \n",
      "Loss =  111.2313597749226\n",
      "R2 = 0.9746%\n",
      "\n",
      "w =  [87.50631143]\n",
      "b =  2.1422612255336815\n"
     ]
    }
   ],
   "source": [
    "print('Prediction: ')\n",
    "y_pred = forward_prop(X_test, w, b)\n",
    "loss = compute_loss(y_test, y_pred)\n",
    "#print(np.hstack([y_test,y_pred]))\n",
    "print('Loss = ', loss)\n",
    "r2 = r2_score(y_pred, y_test)\n",
    "print('R2 = {}%'.format(round(r2, 4)))\n",
    "\n",
    "print('\\nw = ', w)\n",
    "print('b = ', b)\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}