102 lines
2.7 KiB
Python
102 lines
2.7 KiB
Python
from dataclasses import dataclass
|
|
import numpy as np
|
|
from typing import Literal
|
|
|
|
@dataclass
|
|
class NeuralNetworkLayer:
|
|
weights: np.array
|
|
bias: np.array
|
|
activation: Literal["relu", "sigmoid"]
|
|
|
|
@dataclass
|
|
class NeuralNetwork:
|
|
layers: list[NeuralNetworkLayer]
|
|
|
|
def init_network(architecture, seed=1):
|
|
np.random.seed(seed)
|
|
|
|
layers = []
|
|
for i in range(len(architecture) - 1):
|
|
layer_input_size = architecture[i]["size"]
|
|
layer_output_size = architecture[i+1]["size"]
|
|
|
|
layers.append(NeuralNetworkLayer(
|
|
np.random.randn(layer_output_size, layer_input_size) * 0.01,
|
|
np.random.randn(layer_output_size, 1) * 0.1,
|
|
architecture[i+1]["activation"]
|
|
))
|
|
|
|
return NeuralNetwork(layers)
|
|
|
|
def relu(Z):
|
|
return np.maximum(0, Z)
|
|
|
|
def sigmoid(Z):
|
|
return 1/(1+np.exp(-Z))
|
|
|
|
def single_layer_forward_propagation(A_prev, W_curr, b_curr, activation="relu"):
|
|
Z_curr = np.dot(W_curr, A_prev) + b_curr
|
|
|
|
if activation == "relu":
|
|
activation_func = relu
|
|
elif activation == "sigmoid":
|
|
activation_func = sigmoid
|
|
else:
|
|
raise Exception(f"Non-supported activation function: '{activation}'")
|
|
|
|
return activation_func(Z_curr), Z_curr
|
|
|
|
def get_cost_value(Y_hat, Y):
|
|
# number of examples
|
|
m = Y_hat.shape[1]
|
|
# calculation of the cost according to the formula
|
|
cost = -1 / m * (np.dot(Y, np.log(Y_hat).T) + np.dot(1 - Y, np.log(1 - Y_hat).T))
|
|
return np.squeeze(cost)
|
|
|
|
def full_forward_propagation(X, network: NeuralNetwork):
|
|
A_values = []
|
|
Z_values = []
|
|
|
|
A_curr = X
|
|
for layer in network.layers:
|
|
A_prev = A_curr
|
|
|
|
W_curr = layer.weights
|
|
b_curr = layer.bias
|
|
A_curr, Z_curr = single_layer_forward_propagation(A_prev, W_curr, b_curr, layer.activation)
|
|
|
|
A_values.append(A_curr)
|
|
Z_values.append(Z_curr)
|
|
|
|
return A_curr, A_values, Z_values
|
|
|
|
def train(X, Y, network: NeuralNetwork, epochs, learning_rate, verbose=False, callback=None):
|
|
print(X)
|
|
print(Y)
|
|
|
|
cost_history = []
|
|
accuracy_history = []
|
|
|
|
for i in range(epochs):
|
|
Y_hat, A_values, Z_values = full_forward_propagation(X, network)
|
|
|
|
return cost_history, accuracy_history
|
|
|
|
def main(architecture):
|
|
network = init_network(architecture)
|
|
|
|
X_train = np.array([[0,0],[0,1],[1,0],[1,1]])
|
|
Y_train = np.array([0,1,1,1])
|
|
cost_history, accuracy_history = train(X_train.T, np.transpose(Y_train.reshape((Y_train.shape[0], 1))), network, 1000, 0.1)
|
|
|
|
|
|
|
|
main(architecture = [
|
|
{"size": 2},
|
|
{"size": 25, "activation": "relu"},
|
|
{"size": 50, "activation": "relu"},
|
|
{"size": 50, "activation": "relu"},
|
|
{"size": 25, "activation": "relu"},
|
|
{"size": 1, "activation": "sigmoid"}
|
|
])
|