Basic RNN

Introduction

In this blog, I’ll walk you through building the basics blocks of a Recurrent Neural Network.
 

Importing Packages

import numpy as np

Basic Functions

def softmax(x):
  e_x = np.exp(x - np.max(x))
  return e_x / e_x.sum(axis=0)

def sigmoid(x):
  return 1 / (1+np.exp(-x))

Basic RNN Cells

def forward_rnn_cell(x_t, a_prev, parameters):
  Wax = parameters["Wax"]
  Waa = parameters["Waa"]
  Wya = parameters["Wya"]
  ba = parameters["ba"]
  by = parameters["by"]

  a_next = np.tanh(np.dot(Wax, x_t) + np.dot(Waa, a_prev) + ba)
  yt_pred = softmax(np.dot(Wya, a_next) + by)

  # needed for back propagation
  cache = (a_next, a_prev, x_t, parameters)

  return a_next, yt_pred, cache

def rnn_forward(x, a0, parameters):

  caches = []
  n_x, m, T_x = x.shape
  n_y, n_a = parameters["Wya"].shape

  a = np.zeros((n_a, m, T_x))
  y_pred = np.zeros((n_y, m, T_x))

  a_next = a0

  for t in range(T_x):
    a_next, yt_pred, cache = forward_rnn_cell(x[:,:,t], a_next, parameters)
    a[:,:,t] = a_next
    y_pred[:,:,t] = yt_pred
    caches.append(cache)

  caches = (cache,x)

  return a, y_pred, caches

Basic LSTM Cells

def forward_lstm_cell(x_t, a_prev, c_prev, parameters):
  Wf = parameters["Wf"]
  bf = parameters["bf"]
  Wi = parameters["Wi"]
  bi = parameters["bi"]
  Wc = parameters["Wc"]
  bc = parameters["bc"]
  Wo = parameters["Wo"]
  bo = parameters["bo"]
  Wy = parameters["Wy"]
  by = parameters["by"]

  n_x, m = x_t.shape
  n_y, n_a = Wy.shape

  concat = np.zeros((n_a + n_x, m))
  concat[: n_a, :] = a_prev
  concat[n_a :, :] = x_t

  ft = sigmoid(np.dot(Wf, concat) + bf)
  it = sigmoid(np.dot(Wi, concat) + bi)
  cct = np.tanh(np.dot(Wc, concat) + bc)
  c_next = ft * c_prev + it * cct
  ot = sigmoid(np.dot(Wo, concat) + bo)
  a_next = ot * np.tanh(c_next)

  yt_pred = softmax(np.dot(Wy, a_next) + by)

  cache = (a_next, c_next, a_prev, c_prev, ft, it, cct, ot, x_t, parameters)

  return a_next, c_next, yt_pred, cache

def lstm_forward(x, a0, parameters):
    caches = []
    
    n_x, m, T_x = x.shape
    n_y, n_a = parameters["Wy"].shape
    
    a = np.zeros((n_a, m, T_x))
    c = np.zeros((n_a, m, T_x))
    y = np.zeros((n_y, m, T_x))
    
    a_next = a0
    c_next = np.zeros(a_next.shape)
    
    for t in range(T_x):
        a_next, c_next, yt, cache = lstm_cell_forward(x[:, :, t], a_next, c_next, parameters)
        a[:,:,t] = a_next
        y[:,:,t] = yt
        c[:,:,t]  = c_next
        caches.append(cache)
        
    caches = (caches, x)

    return a, y, c, caches

Leave a Reply

Your email address will not be published. Required fields are marked *