In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Abstract class for layers

In [1]:
class Layer:
    def __init__(self):
        self.input = None
        self.output = None

    # computes the output Y of a layer for a given input X
    def forward_propagation(self, input):
        raise NotImplementedError

    # computes dE/dX for a given dE/dY (and update parameters if any)
    def backward_propagation(self, output_error, learning_rate):
        raise NotImplementedError

# Implement fully connected layer

### To refresh your understanding of neural networks, refer to this link:
### https://towardsdatascience.com/understanding-neural-networks-19020b758230


### For backpropagation explanation, refer to this link: 
### https://towardsdatascience.com/understanding-backpropagation-algorithm-7bb3aa2f95fd

In [None]:
#Fully connected layer, inherits from Layer
# This is where you add your code
class FCLayer(Layer):
    # input_size = number of input neurons
    # output_size = number of output neurons
    # input_size, output_size are parameters at initialization of the class.
    def __init__(self, input_size, output_size):
        # a matrix input_size by output_size
        self.weights = np.random.rand(input_size, output_size) - 0.5
        self.bias = np.random.rand(1, output_size) - 0.5

    # returns output for a given input
    # You need to implement this method
    def forward_propagation(self, data):
#         return X*W+b. Hint: use numpy dot products for easier and faster implementation.
#       Insert your code here, and after remove the pass keyword
        pass


    # computes dE/dW, dE/dB for a given output_error=dE/dY. Returns input_error=dE/dX.
    # You need to implement this method
    def backward_propagation(self, error, lr):
#       Insert your code here, and after remove the pass keyword
        pass


# Activation layer is implemented 

In [None]:
# The ActivationLayer class is implemented
class ActivationLayer(Layer):
    def __init__(self, activation, activation_derivative):
        self.activation = activation
        self.activation_derivative = activation_derivative

    # returns the activated input
    def forward_propagation(self, data):
        self.input = data
        self.output = self.activation(self.input)
        return self.output

    # learning_rate is not used because there is no "learnable" parameters.
    def backward_propagation(self, output_error, learning_rate):
        return self.activation_derivative(self.input) * output_error

In [None]:
# activation function and its derivative
def tanh(x):
    return np.tanh(x)

def tanh_derivative(x):
    return 1-np.tanh(x)**2

In [None]:
# loss function and its derivative
def mse(y_true, y_pred):
    return np.mean(np.power(y_true-y_pred, 2))

def mse_derivative(y_true, y_pred):
    return 2*(y_pred-y_true)/y_true.size

# Implement functions Predict() and Fit()

In [None]:
class Network:
    def __init__(self):
        self.layers = []
        self.loss = None
        self.loss_derivative = None

    # add layer to network
    def add(self, layer):
        self.layers.append(layer)

    # set loss to use
    def use(self, loss, loss_derivative):
        self.loss = loss
        self.loss_derivative = loss_derivative

    # predict output for given input
    # This method is used to predict the labels with a trained network
    def predict(self, data):
        # sample dimension first
        samples = len(data)
        result = []

        # feed all the samples to the network. Hint: use the 
        # forward_propagation(). the return will also be a list of the
        # outputs of each sample.
        # Remember to iterate over all 
        for i in range(samples):
            pass

        return result

    # train the network
    # This method is used to predict a label, then use the prediction to calculate the error, 
    # which is backpropagated to adjust the weights.
    def fit(self, x_train, y_train, epochs, learning_rate):
        # sample dimension first
        samples = len(x_train)

        # training loop
        for i in range(epochs):
            err = 0
            for j in range(samples):
                # forward propagation
                output = x_train[j]
                # forward propagate each of the samples through the 
                # network as in the predict()
                

                # compute loss 
                err += self.loss(y_train[j], output)

                # backward propagation
                output_error = self.loss_derivative(y_train[j], output)
                # output_error is computed for you. now backpropagate the output_error
                # using backward_propagation()

            # calculate average error on all samples
            err /= samples
            print('epoch %d/%d   error=%f' % (i+1, epochs, err))

# The data creation and network training code is given.

## You are encouraged to add layers and experiment

In [None]:
# This is some very basic data for prototyping for fast execution. 
# It is used to test if the networks learns, because we train in all the possibilities the testing is the same.
# This is just to ensure your network is operational. Not a real test. 
# training data
x_train = np.array([[[0,0]], [[0,1]], [[1,0]], [[1,1]]])
y_train = np.array([[[0]], [[1]], [[1]], [[0]]])

# network
net = Network()
net.add(FCLayer(2, 3))
net.add(ActivationLayer(tanh, tanh_derivative))
net.add(FCLayer(3, 1))
net.add(ActivationLayer(tanh, tanh_derivative))

# train
net.use(mse, mse_derivative)
net.fit(x_train, y_train, epochs=1000, learning_rate=0.1)

# test
out = net.predict(x_train)
print(out)

In [None]:
from keras.datasets import mnist
from keras.utils import np_utils

# load MNIST from server
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# training data : 60000 samples
# reshape and normalize input data
x_train = x_train.reshape(x_train.shape[0], 1, 28*28)
x_train = x_train.astype('float32')
x_train /= 255
# encode output which is a number in range [0,9] into a vector of size 10
# e.g. number 3 will become [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]
y_train = np_utils.to_categorical(y_train)

# same for test data : 10000 samples
x_test = x_test.reshape(x_test.shape[0], 1, 28*28)
x_test = x_test.astype('float32')
x_test /= 255
y_test = np_utils.to_categorical(y_test)

# Network
net = Network()
net.add(FCLayer(28*28, 100))                # input_shape=(1, 28*28)    ;   output_shape=(1, 100)
net.add(ActivationLayer(tanh, tanh_derivative))
net.add(FCLayer(100, 50))                   # input_shape=(1, 100)      ;   output_shape=(1, 50)
net.add(ActivationLayer(tanh, tanh_derivative))
net.add(FCLayer(50, 10))                    # input_shape=(1, 50)       ;   output_shape=(1, 10)
net.add(ActivationLayer(tanh, tanh_derivative))

# train on 1000 samples
# as we didn't implemented mini-batch GD, training will be pretty slow if we update at each iteration on 60000 samples...
net.use(mse, mse_derivative)
net.fit(x_train[0:1000], y_train[0:1000], epochs=200, learning_rate=0.1)

# test on 5 samples


In [None]:
out = net.predict(x_test[0:5])
print("\n")
print("predicted values : ")
print(out, end="\n")
print("true values : ")
print(y_test[0:5])