diff --git a/Ataskaita lab3.docx b/Ataskaita lab3.docx new file mode 100644 index 0000000..126beef Binary files /dev/null and b/Ataskaita lab3.docx differ diff --git a/lab3_1.py b/lab3_1.py new file mode 100644 index 0000000..8785f07 --- /dev/null +++ b/lab3_1.py @@ -0,0 +1,212 @@ +import pandas as pd +import matplotlib.pyplot as plt +import matplotlib.patches as mpatches +from sklearn.linear_model import LinearRegression +import numpy as np + +# Warning, this is not great code. Just something hacked together for the report. + +def create_dataset(sunspots, n = 2): + P = [] + T = [] + for i in range(len(sunspots) - n): + P.append(np.array(sunspots["activity"][i:(i+n)])) + T.append(sunspots["activity"][i+n]) + return P, T + +# 1. & 2. +sunspots = pd.read_csv("sunspot.txt", delimiter='\t', header=None, names=["year", "activity"]) + +# 4. +if False: + plt.plot(sunspots['year'], sunspots["activity"]) + plt.xlabel("Metai") + plt.ylabel("Aktyvumas") + plt.title(f"Saulės aktyvumas tarp {min(sunspots['year'])} ir {max(sunspots['year'])} metų") + plt.show() + +# 5. +P, T = create_dataset(sunspots) + +# 6. +def draw_scatterplot(Xs, Ys, marker = 'o'): + ax = plt.subplot(projection='3d') + X_coords = [] + Y_coords = [] + Z_coords = [] + for i in range(len(Xs)): + X_coords.append(Xs[i][0]) + Y_coords.append(Xs[i][1]) + Z_coords.append(Ys[i]) + + ax.scatter(X_coords, Y_coords, Z_coords, marker=marker) + ax.set_xlabel('Užpraitų metų aktyvumas') + ax.set_ylabel('Praitų metų aktyvumas') + ax.set_zlabel('Šių metų aktyvumas') + ax.set_title('Šių metų aktyvumo priklausomybė nuo praėjusių metų') + + return ax + +if False: + draw_scatterplot(P, T) + plt.show() + +# 7. +assert len(P) > 200 +Pu = P[:200] +Tu = T[:200] + +# 8. +model = LinearRegression().fit(Pu, Tu) + +# 9. +print(f"intercept: {model.intercept_}") +print(f"coefficients: {model.coef_}") + +if False: + w1 = model.coef_[0] + w2 = model.coef_[1] + b = model.intercept_ + + x1 = max(p[0] for p in P) + x2 = max(p[1] for p in P) + ax = draw_scatterplot(P, T) + ax.plot([0, x1], [0, x2], 'r', zs=[b, x1*w1 + x2*w2 + b]) + plt.show() + +# 10. +if False: + draw_scatterplot(Pu, model.predict(Pu)) + draw_scatterplot(Pu, Tu) + + plt.legend(handles=[ + mpatches.Patch(color='blue', label='Tikrasis'), + mpatches.Patch(color='orange', label='Numatytas') + ]) + plt.show() + +# 11. +if False: + e = np.array(Tu) - model.predict(Pu) + draw_scatterplot(Pu, e) + plt.show() + +# 12. +if False: + e = np.array(Tu) - model.predict(Pu) + plt.hist(e, bins=40) + plt.xlabel("Klaida") + plt.ylabel("Dažnis") + plt.show() + +# 13. +e = np.array(Tu) - model.predict(Pu) +print("MSE (train)", np.sum(e**2) / len(e)) +print("MAD (train)", np.median(np.abs(e))) + +e = np.array(T[200:]) - model.predict(P[200:]) +print("MSE (test)", np.sum(e**2) / len(e)) +print("MAD (test)", np.median(np.abs(e))) + +# 14. & 15. & 16. +class AdaptiveLinearNeuron(object): + def __init__(self, rate = 0.01, max_niter = 10, mse_goal = -1): + self.rate = rate + self.max_niter = max_niter + self.mse_goal = mse_goal + + def fit(self, X, y): + """Fit training data + X : Training vectors, X.shape : [#samples, #features] + y : Target values, y.shape : [#samples] + """ + + # weights + self.weight = np.zeros(1 + X.shape[1]) + + # Number of misclassifications + self.errors = [] + + # Cost function + self.cost = [] + + for i in range(self.max_niter): + output = self.net_input(X) + errors = y - output + self.weight[1:] += self.rate * X.T.dot(errors) + self.weight[0] += self.rate * errors.sum() + cost = (errors**2).sum() / 2.0 + self.cost.append(cost) + + mse = np.average(errors**2) + if self.mse_goal != -1 and mse < self.mse_goal: break + return self + + def net_input(self, X): + """Calculate net input""" + return np.dot(X, self.weight[1:]) + self.weight[0] + + def activation(self, X): + """Compute linear activation""" + return self.net_input(X) + + def predict(self, X): + """Return class label after unit step""" + return np.where(self.activation(X) >= 0.0, 1, -1) + + +# print("---------------") +# for lr in [5e-5, 1e-5, 5e-6, 1e-6, 5e-7, 1e-7, 5e-8, 1e-8, 5e-9, 1e-9]: +# aln = AdaptiveLinearNeuron(lr, 100000, 300).fit(np.array(Pu), np.array(Tu)) + +# print(lr, aln.cost[-1], len(aln.cost)) + +print("---------------") +aln = AdaptiveLinearNeuron(0.000001, 1000, 280).fit(np.array(Pu), np.array(Tu)) +if False: + plt.plot(aln.cost, marker='o') + plt.xlabel('Epochs') + plt.ylabel('Sum-squared-error') + plt.title('Adaptive Linear Neuron - Learning rate 0.000001') + plt.show() + +# 17. +print("iterations", len(aln.cost)) +print("b", aln.weight[0]) +print("w1", aln.weight[1]) +print("w2", aln.weight[2]) + +# 18. +# * Taip konverguoja, nes pasiekiama užsibrėžta MSE riba +# * b ~ 0.17, w1 ~ -0.58, w2 ~ 1.46 + +# 19. +# Didinti 'lr' nelabai galima, nes tada pradės nekonverguoti ir įvyks slankaus kablelio klaidos +# Mažinant 'lr' tiesiog prailgina mokymosi ilgiai tiesiškai + +# 20. +print("------ Didinimas ---------") +for n in [2, 4, 6, 8, 10]: + print(f"====== n = {n}") + P, T = create_dataset(sunspots, n) + assert len(P) > 200 + Pu = np.array(P[:200]) + Tu = np.array(T[:200]) + + # aln = AdaptiveLinearNeuron(1e-7, 1000).fit(Pu, Tu) + # plt.plot(aln.cost, marker='o') + # plt.xlabel('Epochs') + # plt.ylabel('Sum-squared-error') + # plt.title('Adaptive Linear Neuron - Learning rate 0.000001') + # plt.show() + # print(f"cost (n={n}) ", aln.cost[-1]) + # print(f"iterations (n={n}) ", len(aln.cost)) + + model = LinearRegression().fit(Pu, Tu) + e = Tu - model.predict(Pu) + print("MSE (train)", np.average(e**2)) + print("MAD (train)", np.median(np.abs(e))) + + e = np.array(T[200:]) - model.predict(P[200:]) + print("MSE (test)", np.average(e**2)) + print("MAD (test)", np.median(np.abs(e))) \ No newline at end of file diff --git a/lab3_2.py b/lab3_2.py new file mode 100644 index 0000000..590f270 --- /dev/null +++ b/lab3_2.py @@ -0,0 +1,49 @@ +import pandas as pd +import matplotlib.pyplot as plt +import tensorflow as tf +from sklearn.model_selection import cross_val_score + +def load_data(): + apples = pd.read_csv("apple_quality_clean.csv") + + + return apples.drop(columns=["Quality"]), apples["Quality"] + +X, Y = load_data() +# Convert values from a 0..1 range to a (-1)..1 range +# Conclusion: makes no difference +#X = X.map(lambda x: x*2 - 1) + +tf.keras.utils.set_random_seed(42) + +model = tf.keras.models.Sequential([ + tf.keras.layers.Dense(1, activation='relu') +]) + +model.build(input_shape=[None,len(X.columns)]) + +model.compile( + optimizer=tf.keras.optimizers.Adam(learning_rate=0.01), + loss='binary_crossentropy', + metrics=['accuracy'], +) + +history = model.fit( + X, + Y, + batch_size=len(X), + epochs=200, + shuffle=True, + validation_split=0.2 +) + +# print("10-fold cross validation score:", cross_val_score(model, X, Y, cv=10)) + +# print("Accuracy", history.history['acc'][-1]) +# print(model.score) + +plt.plot(history.history['accuracy']) +plt.plot(history.history['val_accuracy']) +plt.xlabel('Iterations') +plt.ylabel('accuracy') +plt.show() diff --git a/sunspot.txt b/sunspot.txt new file mode 100644 index 0000000..f16e2be --- /dev/null +++ b/sunspot.txt @@ -0,0 +1,315 @@ +1700 5 +1701 11 +1702 16 +1703 23 +1704 36 +1705 58 +1706 29 +1707 20 +1708 10 +1709 8 +1710 3 +1711 0 +1712 0 +1713 2 +1714 11 +1715 27 +1716 47 +1717 63 +1718 60 +1719 39 +1720 28 +1721 26 +1722 22 +1723 11 +1724 21 +1725 40 +1726 78 +1727 122 +1728 103 +1729 73 +1730 47 +1731 35 +1732 11 +1733 5 +1734 16 +1735 34 +1736 70 +1737 81 +1738 111 +1739 101 +1740 73 +1741 40 +1742 20 +1743 16 +1744 5 +1745 11 +1746 22 +1747 40 +1748 60 +1749 81 +1750 83 +1751 48 +1752 48 +1753 31 +1754 12 +1755 10 +1756 10 +1757 32 +1758 48 +1759 54 +1760 63 +1761 86 +1762 61 +1763 45 +1764 36 +1765 21 +1766 11 +1767 38 +1768 70 +1769 106 +1770 101 +1771 82 +1772 67 +1773 35 +1774 31 +1775 7 +1776 20 +1777 93 +1778 154 +1779 126 +1780 85 +1781 68 +1782 39 +1783 23 +1784 10 +1785 24 +1786 83 +1787 132 +1788 131 +1789 118 +1790 90 +1791 67 +1792 60 +1793 47 +1794 41 +1795 21 +1796 16 +1797 6 +1798 4 +1799 7 +1800 15 +1801 34 +1802 45 +1803 43 +1804 48 +1805 42 +1806 28 +1807 10 +1808 8 +1809 3 +1810 0 +1811 1 +1812 5 +1813 12 +1814 14 +1815 35 +1816 46 +1817 41 +1818 30 +1819 24 +1820 16 +1821 7 +1822 4 +1823 2 +1824 9 +1825 17 +1826 36 +1827 50 +1828 64 +1829 67 +1830 71 +1831 48 +1832 28 +1833 9 +1834 13 +1835 57 +1836 122 +1837 138 +1838 103 +1839 86 +1840 65 +1841 37 +1842 24 +1843 11 +1844 15 +1845 40 +1846 62 +1847 99 +1848 125 +1849 96 +1850 67 +1851 65 +1852 54 +1853 39 +1854 21 +1855 7 +1856 4 +1857 23 +1858 55 +1859 94 +1860 96 +1861 77 +1862 59 +1863 44 +1864 47 +1865 31 +1866 16 +1867 7 +1868 38 +1869 74 +1870 139 +1871 111 +1872 102 +1873 66 +1874 45 +1875 17 +1876 11 +1877 12 +1878 3 +1879 6 +1880 32 +1881 54 +1882 60 +1883 64 +1884 64 +1885 52 +1886 25 +1887 13 +1888 7 +1889 6 +1890 7 +1891 36 +1892 73 +1893 85 +1894 78 +1895 64 +1896 42 +1897 26 +1898 27 +1899 12 +1900 10 +1901 3 +1902 5 +1903 24 +1904 42 +1905 64 +1906 54 +1907 62 +1908 49 +1909 44 +1910 19 +1911 6 +1912 4 +1913 1 +1914 10 +1915 47 +1916 57 +1917 104 +1918 81 +1919 64 +1920 38 +1921 26 +1922 14 +1923 6 +1924 17 +1925 44 +1926 64 +1927 69 +1928 78 +1929 65 +1930 36 +1931 21 +1932 11 +1933 6 +1934 9 +1935 36 +1936 80 +1937 114 +1938 110 +1939 89 +1940 68 +1941 48 +1942 31 +1943 16 +1944 10 +1945 33 +1946 93 +1947 152 +1948 136 +1949 135 +1950 84 +1951 69 +1952 32 +1953 14 +1954 4 +1955 38 +1956 142 +1957 190 +1958 185 +1959 159 +1960 112 +1961 54 +1962 38 +1963 28 +1964 10 +1965 15 +1966 47 +1967 94 +1968 106 +1969 106 +1970 105 +1971 67 +1972 69 +1973 38 +1974 35 +1975 16 +1976 13 +1977 28 +1978 93 +1979 155 +1980 155 +1981 141 +1982 116 +1983 67 +1984 46 +1985 18 +1986 13 +1987 29 +1988 100 +1989 158 +1990 143 +1991 146 +1992 94 +1993 55 +1994 30 +1995 18 +1996 9 +1997 22 +1998 64 +1999 93 +2000 119 +2001 111 +2002 104 +2003 64 +2004 40 +2005 30 +2006 15 +2007 7 +2008 3 +2009 4 +2010 16 +2011 57 +2012 58 +2013 65 +2014 79 \ No newline at end of file