Clipping and Sampling for Recurrent Neural Networks

Introduction

In this blog post, I’ll walk you through the simple steps of clipping and sampling for recurrent neural networks. 
Gradient Clipping ensures that your gradients won’t explode, and you can converge to the optimal solution of the cost function easily. We’ll implement a straightforward clipping method by limiting the gradients between [-N, N] for an arbitrary N.

Clipping

def clip(gradients, maxValue):
    
    dWaa, dWax, dWya, db, dby = gradients['dWaa'], gradients['dWax'], gradients['dWya'], gradients['db'], gradients['dby']
   
    
    for gradient in [dWax, dWaa, dWya, db, dby]:
        np.clip(gradient, -maxValue, maxValue, out=gradient)
    
    
    gradients = {"dWaa": dWaa, "dWax": dWax, "dWya": dWya, "db": db, "dby": dby}
    
    return gradients

Sampling

def sample(parameters, char_to_ix, seed):
    
   
    Waa, Wax, Wya, by, b = parameters['Waa'], parameters['Wax'], parameters['Wya'], parameters['by'], parameters['b']
    vocab_size = by.shape[0]
    n_a = Waa.shape[1]
    
    
    x = np.zeros((vocab_size, 1))
    
    a_prev = np.zeros((n_a, 1))
    
    
    indices = []
    
    
    idx = -1 
    
   
    
    counter = 0
    newline_character = char_to_ix['\n']
    
    while (idx != newline_character and counter != 50):
        
        
        a = np.tanh(np.dot(Wax, x) + np.dot(Waa, a_prev) + b)
        z = np.dot(Wya, a) + by
        y = softmax(z)
        
        
        
               idx = np.random.choice(list(range(vocab_size)), p=y.ravel())

        
        x = np.zeros((vocab_size, 1))
        x[idx] = 1
        
        a_prev = a
        
  
        counter +=1
        

    if (counter == 50):
        indices.append(char_to_ix['\n'])
    
    return indices

Leave a Reply

Your email address will not be published. Required fields are marked *