from dataclasses import dataclass import numpy as np from typing import Literal @dataclass class NeuralNetworkLayer: weights: np.array bias: np.array activation: Literal["relu", "sigmoid"] @dataclass class NeuralNetwork: layers: list[NeuralNetworkLayer] def init_network(architecture, seed=1): np.random.seed(seed) layers = [] for i in range(len(architecture) - 1): layer_input_size = architecture[i]["size"] layer_output_size = architecture[i+1]["size"] layers.append(NeuralNetworkLayer( np.random.randn(layer_output_size, layer_input_size) * 0.01, np.random.randn(layer_output_size, 1) * 0.1, architecture[i+1]["activation"] )) return NeuralNetwork(layers) def relu(Z): return np.maximum(0, Z) def sigmoid(Z): return 1/(1+np.exp(-Z)) def single_layer_forward_propagation(A_prev, W_curr, b_curr, activation="relu"): Z_curr = np.dot(W_curr, A_prev) + b_curr if activation == "relu": activation_func = relu elif activation == "sigmoid": activation_func = sigmoid else: raise Exception(f"Non-supported activation function: '{activation}'") return activation_func(Z_curr), Z_curr def get_cost_value(Y_hat, Y): # number of examples m = Y_hat.shape[1] # calculation of the cost according to the formula cost = -1 / m * (np.dot(Y, np.log(Y_hat).T) + np.dot(1 - Y, np.log(1 - Y_hat).T)) return np.squeeze(cost) def full_forward_propagation(X, network: NeuralNetwork): A_values = [] Z_values = [] A_curr = X for layer in network.layers: A_prev = A_curr W_curr = layer.weights b_curr = layer.bias A_curr, Z_curr = single_layer_forward_propagation(A_prev, W_curr, b_curr, layer.activation) A_values.append(A_curr) Z_values.append(Z_curr) return A_curr, A_values, Z_values def train(X, Y, network: NeuralNetwork, epochs, learning_rate, verbose=False, callback=None): print(X) print(Y) cost_history = [] accuracy_history = [] for i in range(epochs): Y_hat, A_values, Z_values = full_forward_propagation(X, network) return cost_history, accuracy_history def main(architecture): network = init_network(architecture) X_train = np.array([[0,0],[0,1],[1,0],[1,1]]) Y_train = np.array([0,1,1,1]) cost_history, accuracy_history = train(X_train.T, np.transpose(Y_train.reshape((Y_train.shape[0], 1))), network, 1000, 0.1) main(architecture = [ {"size": 2}, {"size": 25, "activation": "relu"}, {"size": 50, "activation": "relu"}, {"size": 50, "activation": "relu"}, {"size": 25, "activation": "relu"}, {"size": 1, "activation": "sigmoid"} ])