lab3

2024-05-13 01:32:51 +03:00 · 2024-05-13 01:32:51 +03:00 · 19eaa28e47
commit 19eaa28e47
parent c86d4ca749
4 changed files with 576 additions and 0 deletions
--- a/lab3.docx
+++ b/lab3.docx
--- a/lab3_1.py
+++ b/lab3_1.py
@ -0,0 +1,212 @@
+import pandas as pd
+import matplotlib.pyplot as plt
+import matplotlib.patches as mpatches
+from sklearn.linear_model import LinearRegression
+import numpy as np
+
+# Warning, this is not great code. Just something hacked together for the report.
+
+def create_dataset(sunspots, n = 2):
+    P = []
+    T = []
+    for i in range(len(sunspots) - n):
+        P.append(np.array(sunspots["activity"][i:(i+n)]))
+        T.append(sunspots["activity"][i+n])
+    return P, T
+
+# 1. & 2.
+sunspots = pd.read_csv("sunspot.txt", delimiter='\t', header=None, names=["year", "activity"])
+
+# 4.
+if False:
+    plt.plot(sunspots['year'], sunspots["activity"])
+    plt.xlabel("Metai")
+    plt.ylabel("Aktyvumas")
+    plt.title(f"Saulės aktyvumas tarp {min(sunspots['year'])} ir {max(sunspots['year'])} metų")
+    plt.show()
+
+# 5.
+P, T = create_dataset(sunspots)
+
+# 6.
+def draw_scatterplot(Xs, Ys, marker = 'o'):
+    ax = plt.subplot(projection='3d')
+    X_coords = []
+    Y_coords = []
+    Z_coords = []
+    for i in range(len(Xs)):
+        X_coords.append(Xs[i][0])
+        Y_coords.append(Xs[i][1])
+        Z_coords.append(Ys[i])
+
+    ax.scatter(X_coords, Y_coords, Z_coords, marker=marker)
+    ax.set_xlabel('Užpraitų metų aktyvumas')
+    ax.set_ylabel('Praitų metų aktyvumas')
+    ax.set_zlabel('Šių metų aktyvumas')
+    ax.set_title('Šių metų aktyvumo priklausomybė nuo praėjusių metų')
+
+    return ax
+
+if False:
+    draw_scatterplot(P, T)
+    plt.show()
+
+# 7.
+assert len(P) > 200
+Pu = P[:200]
+Tu = T[:200]
+
+# 8.
+model = LinearRegression().fit(Pu, Tu)
+
+# 9.
+print(f"intercept: {model.intercept_}")
+print(f"coefficients: {model.coef_}")
+
+if False:
+    w1 = model.coef_[0]
+    w2 = model.coef_[1]
+    b = model.intercept_
+
+    x1 = max(p[0] for p in P)
+    x2 = max(p[1] for p in P)
+    ax = draw_scatterplot(P, T)
+    ax.plot([0, x1], [0, x2], 'r', zs=[b, x1*w1 + x2*w2 + b])
+    plt.show()
+
+# 10.
+if False:
+    draw_scatterplot(Pu, model.predict(Pu))
+    draw_scatterplot(Pu, Tu)
+
+    plt.legend(handles=[
+        mpatches.Patch(color='blue', label='Tikrasis'),
+        mpatches.Patch(color='orange', label='Numatytas')
+    ])
+    plt.show()
+
+# 11.
+if False:
+    e = np.array(Tu) - model.predict(Pu)
+    draw_scatterplot(Pu, e)
+    plt.show()
+
+# 12.
+if False:
+    e = np.array(Tu) - model.predict(Pu)
+    plt.hist(e, bins=40)
+    plt.xlabel("Klaida")
+    plt.ylabel("Dažnis")
+    plt.show()
+
+# 13.
+e = np.array(Tu) - model.predict(Pu)
+print("MSE (train)", np.sum(e**2) / len(e))
+print("MAD (train)", np.median(np.abs(e)))
+
+e = np.array(T[200:]) - model.predict(P[200:])
+print("MSE (test)", np.sum(e**2) / len(e))
+print("MAD (test)", np.median(np.abs(e)))
+
+# 14. & 15. & 16.
+class AdaptiveLinearNeuron(object):
+   def __init__(self, rate = 0.01, max_niter = 10, mse_goal = -1):
+      self.rate = rate
+      self.max_niter = max_niter
+      self.mse_goal = mse_goal
+
+   def fit(self, X, y):
+      """Fit training data
+      X : Training vectors, X.shape : [#samples, #features]
+      y : Target values, y.shape : [#samples]
+      """
+
+      # weights
+      self.weight = np.zeros(1 + X.shape[1])
+
+      # Number of misclassifications
+      self.errors = []
+
+      # Cost function
+      self.cost = []
+
+      for i in range(self.max_niter):
+         output = self.net_input(X)
+         errors = y - output
+         self.weight[1:] += self.rate * X.T.dot(errors)
+         self.weight[0] += self.rate * errors.sum()
+         cost = (errors**2).sum() / 2.0
+         self.cost.append(cost)
+
+         mse = np.average(errors**2)
+         if self.mse_goal != -1 and mse < self.mse_goal: break
+      return self
+
+   def net_input(self, X):
+      """Calculate net input"""
+      return np.dot(X, self.weight[1:]) + self.weight[0]
+
+   def activation(self, X):
+      """Compute linear activation"""
+      return self.net_input(X)
+
+   def predict(self, X):
+      """Return class label after unit step"""
+      return np.where(self.activation(X) >= 0.0, 1, -1)
+
+
+# print("---------------")
+# for lr in [5e-5, 1e-5, 5e-6, 1e-6, 5e-7, 1e-7, 5e-8, 1e-8, 5e-9, 1e-9]:
+#     aln = AdaptiveLinearNeuron(lr, 100000, 300).fit(np.array(Pu), np.array(Tu))
+
+#     print(lr, aln.cost[-1], len(aln.cost))
+
+print("---------------")
+aln = AdaptiveLinearNeuron(0.000001, 1000, 280).fit(np.array(Pu), np.array(Tu))
+if False:
+    plt.plot(aln.cost, marker='o')
+    plt.xlabel('Epochs')
+    plt.ylabel('Sum-squared-error')
+    plt.title('Adaptive Linear Neuron - Learning rate 0.000001')
+    plt.show()
+
+# 17.
+print("iterations", len(aln.cost))
+print("b", aln.weight[0])
+print("w1", aln.weight[1])
+print("w2", aln.weight[2])
+
+# 18.
+# * Taip konverguoja, nes pasiekiama užsibrėžta MSE riba
+# * b ~ 0.17, w1 ~ -0.58, w2 ~ 1.46
+
+# 19.
+# Didinti 'lr' nelabai galima, nes tada pradės nekonverguoti ir įvyks slankaus kablelio klaidos
+# Mažinant 'lr' tiesiog prailgina mokymosi ilgiai tiesiškai
+
+# 20.
+print("------ Didinimas ---------")
+for n in [2, 4, 6, 8, 10]:
+    print(f"====== n = {n}")
+    P, T = create_dataset(sunspots, n)
+    assert len(P) > 200
+    Pu = np.array(P[:200])
+    Tu = np.array(T[:200])
+
+    # aln = AdaptiveLinearNeuron(1e-7, 1000).fit(Pu, Tu)
+    # plt.plot(aln.cost, marker='o')
+    # plt.xlabel('Epochs')
+    # plt.ylabel('Sum-squared-error')
+    # plt.title('Adaptive Linear Neuron - Learning rate 0.000001')
+    # plt.show()
+    # print(f"cost (n={n}) ", aln.cost[-1])
+    # print(f"iterations (n={n}) ", len(aln.cost))
+
+    model = LinearRegression().fit(Pu, Tu)
+    e = Tu - model.predict(Pu)
+    print("MSE (train)", np.average(e**2))
+    print("MAD (train)", np.median(np.abs(e)))
+
+    e = np.array(T[200:]) - model.predict(P[200:])
+    print("MSE (test)", np.average(e**2))
+    print("MAD (test)", np.median(np.abs(e)))
--- a/lab3_2.py
+++ b/lab3_2.py
@ -0,0 +1,49 @@
+import pandas as pd
+import matplotlib.pyplot as plt
+import tensorflow as tf
+from sklearn.model_selection import cross_val_score
+
+def load_data():
+    apples = pd.read_csv("apple_quality_clean.csv")
+
+
+    return apples.drop(columns=["Quality"]), apples["Quality"]
+
+X, Y = load_data()
+# Convert values from a 0..1 range to a (-1)..1 range
+# Conclusion: makes no difference
+#X = X.map(lambda x: x*2 - 1)
+
+tf.keras.utils.set_random_seed(42)
+
+model = tf.keras.models.Sequential([
+  tf.keras.layers.Dense(1, activation='relu')
+])
+
+model.build(input_shape=[None,len(X.columns)])
+
+model.compile(
+  optimizer=tf.keras.optimizers.Adam(learning_rate=0.01),
+  loss='binary_crossentropy',
+  metrics=['accuracy'],
+)
+
+history = model.fit(
+  X,
+  Y,
+  batch_size=len(X),
+  epochs=200,
+  shuffle=True,
+  validation_split=0.2
+)
+
+# print("10-fold cross validation score:", cross_val_score(model, X, Y, cv=10))
+
+# print("Accuracy", history.history['acc'][-1])
+# print(model.score)
+
+plt.plot(history.history['accuracy'])
+plt.plot(history.history['val_accuracy'])
+plt.xlabel('Iterations')
+plt.ylabel('accuracy')
+plt.show()
--- a/sunspot.txt
+++ b/sunspot.txt
@ -0,0 +1,315 @@
+1700	5
+1701	11
+1702	16
+1703	23
+1704	36
+1705	58
+1706	29
+1707	20
+1708	10
+1709	8
+1710	3
+1711	0
+1712	0
+1713	2
+1714	11
+1715	27
+1716	47
+1717	63
+1718	60
+1719	39
+1720	28
+1721	26
+1722	22
+1723	11
+1724	21
+1725	40
+1726	78
+1727	122
+1728	103
+1729	73
+1730	47
+1731	35
+1732	11
+1733	5
+1734	16
+1735	34
+1736	70
+1737	81
+1738	111
+1739	101
+1740	73
+1741	40
+1742	20
+1743	16
+1744	5
+1745	11
+1746	22
+1747	40
+1748	60
+1749	81
+1750	83
+1751	48
+1752	48
+1753	31
+1754	12
+1755	10
+1756	10
+1757	32
+1758	48
+1759	54
+1760	63
+1761	86
+1762	61
+1763	45
+1764	36
+1765	21
+1766	11
+1767	38
+1768	70
+1769	106
+1770	101
+1771	82
+1772	67
+1773	35
+1774	31
+1775	7
+1776	20
+1777	93
+1778	154
+1779	126
+1780	85
+1781	68
+1782	39
+1783	23
+1784	10
+1785	24
+1786	83
+1787	132
+1788	131
+1789	118
+1790	90
+1791	67
+1792	60
+1793	47
+1794	41
+1795	21
+1796	16
+1797	6
+1798	4
+1799	7
+1800	15
+1801	34
+1802	45
+1803	43
+1804	48
+1805	42
+1806	28
+1807	10
+1808	8
+1809	3
+1810	0
+1811	1
+1812	5
+1813	12
+1814	14
+1815	35
+1816	46
+1817	41
+1818	30
+1819	24
+1820	16
+1821	7
+1822	4
+1823	2
+1824	9
+1825	17
+1826	36
+1827	50
+1828	64
+1829	67
+1830	71
+1831	48
+1832	28
+1833	9
+1834	13
+1835	57
+1836	122
+1837	138
+1838	103
+1839	86
+1840	65
+1841	37
+1842	24
+1843	11
+1844	15
+1845	40
+1846	62
+1847	99
+1848	125
+1849	96
+1850	67
+1851	65
+1852	54
+1853	39
+1854	21
+1855	7
+1856	4
+1857	23
+1858	55
+1859	94
+1860	96
+1861	77
+1862	59
+1863	44
+1864	47
+1865	31
+1866	16
+1867	7
+1868	38
+1869	74
+1870	139
+1871	111
+1872	102
+1873	66
+1874	45
+1875	17
+1876	11
+1877	12
+1878	3
+1879	6
+1880	32
+1881	54
+1882	60
+1883	64
+1884	64
+1885	52
+1886	25
+1887	13
+1888	7
+1889	6
+1890	7
+1891	36
+1892	73
+1893	85
+1894	78
+1895	64
+1896	42
+1897	26
+1898	27
+1899	12
+1900	10
+1901	3
+1902	5
+1903	24
+1904	42
+1905	64
+1906	54
+1907	62
+1908	49
+1909	44
+1910	19
+1911	6
+1912	4
+1913	1
+1914	10
+1915	47
+1916	57
+1917	104
+1918	81
+1919	64
+1920	38
+1921	26
+1922	14
+1923	6
+1924	17
+1925	44
+1926	64
+1927	69
+1928	78
+1929	65
+1930	36
+1931	21
+1932	11
+1933	6
+1934	9
+1935	36
+1936	80
+1937	114
+1938	110
+1939	89
+1940	68
+1941	48
+1942	31
+1943	16
+1944	10
+1945	33
+1946	93
+1947	152
+1948	136
+1949	135
+1950	84
+1951	69
+1952	32
+1953	14
+1954	4
+1955	38
+1956	142
+1957	190
+1958	185
+1959	159
+1960	112
+1961	54
+1962	38
+1963	28
+1964	10
+1965	15
+1966	47
+1967	94
+1968	106
+1969	106
+1970	105
+1971	67
+1972	69
+1973	38
+1974	35
+1975	16
+1976	13
+1977	28
+1978	93
+1979	155
+1980	155
+1981	141
+1982	116
+1983	67
+1984	46
+1985	18
+1986	13
+1987	29
+1988	100
+1989	158
+1990	143
+1991	146
+1992	94
+1993	55
+1994	30
+1995	18
+1996	9
+1997	22
+1998	64
+1999	93
+2000	119
+2001	111
+2002	104
+2003	64
+2004	40
+2005	30
+2006	15
+2007	7
+2008	3
+2009	4
+2010	16
+2011	57
+2012	58
+2013	65
+2014	79