Clipping and Sampling for Recurrent Neural Networks


In this blog post, I’ll walk you through the simple steps of clipping and sampling for recurrent neural networks. 
Gradient Clipping ensures that your gradients won’t explode, and you can converge to the optimal solution of the cost function easily. We’ll implement a straightforward clipping method by limiting the gradients between [-N, N] for an arbitrary N.


def clip(gradients, maxValue):
    dWaa, dWax, dWya, db, dby = gradients['dWaa'], gradients['dWax'], gradients['dWya'], gradients['db'], gradients['dby']
    for gradient in [dWax, dWaa, dWya, db, dby]:
        np.clip(gradient, -maxValue, maxValue, out=gradient)
    gradients = {"dWaa": dWaa, "dWax": dWax, "dWya": dWya, "db": db, "dby": dby}
    return gradients


def sample(parameters, char_to_ix, seed):
    Waa, Wax, Wya, by, b = parameters['Waa'], parameters['Wax'], parameters['Wya'], parameters['by'], parameters['b']
    vocab_size = by.shape[0]
    n_a = Waa.shape[1]
    x = np.zeros((vocab_size, 1))
    a_prev = np.zeros((n_a, 1))
    indices = []
    idx = -1 
    counter = 0
    newline_character = char_to_ix['\n']
    while (idx != newline_character and counter != 50):
        a = np.tanh(, x) +, a_prev) + b)
        z =, a) + by
        y = softmax(z)
               idx = np.random.choice(list(range(vocab_size)), p=y.ravel())

        x = np.zeros((vocab_size, 1))
        x[idx] = 1
        a_prev = a
        counter +=1

    if (counter == 50):
    return indices

Leave a Reply

Your email address will not be published. Required fields are marked *