PVA COMMUNITY
from tensorflow.keras.preprocessing.sequence import pad_sequences from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, Bidirectional from tensorflow.keras.preprocessing.text import Tokenizer from tensorflow.keras.models import Sequential from tensorflow.keras.optimizers import Adam from tensorflow.keras import regularizers import tensorflow.keras.utils as ku import numpy as np
For the purpose of this model, I’ve used the sonnets.txt file from Laurence Moroney’s blog. You can download it as well by using the code below
wget --no-check-certificate \ https://storage.googleapis.com/laurencemoroney-blog.appspot.com/sonnets.txt \ -O /tmp/sonnets.txt data = open('/tmp/sonnets.txt').read() corpus = data.lower().split("\n")
tokenizer = Tokenizer() tokenizer.fit_on_texts(corpus) total_words = len(tokenizer.word_index) + 1
input_sequences = [] for line in corpus: token_list = tokenizer.texts_to_sequences([line])[0] for i in range(1, len(token_list)): n_gram_sequence = token_list[:i+1] input_sequences.append(n_gram_sequence)
max_sequence_len = max([len(x) for x in input_sequences]) input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))
predictors, label = input_sequences[:,:-1],input_sequences[:,-1] label = ku.to_categorical(label, num_classes=total_words)
model = Sequential() model.add(Embedding(total_words, 100, input_length=max_sequence_len-1)) model.add(Bidirectional(LSTM(150, return_sequences = True))) model.add(Dropout(0.2)) model.add(LSTM(100)) model.add(Dense(total_words/2, activation='relu', kernel_regularizer=regularizers.l2(0.01))) model.add(Dense(total_words, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) print(model.summary())
history = model.fit(predictors, label, epochs=100, verbose=1) import matplotlib.pyplot as plt acc = history.history['accuracy'] loss = history.history['loss'] epochs = range(len(acc)) plt.plot(epochs, acc, 'b', label='Training accuracy') plt.title('Training accuracy') plt.figure() plt.plot(epochs, loss, 'b', label='Training Loss') plt.title('Training loss') plt.legend() plt.show()
seed_text = "Help me Saurav Maheshkar, you're my only hope" next_words = 100 for _ in range(next_words): token_list = tokenizer.texts_to_sequences([seed_text])[0] token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre') predicted = model.predict_classes(token_list, verbose=0) output_word = "" for word, index in tokenizer.word_index.items(): if index == predicted: output_word = word break seed_text += " " + output_word print(seed_text)
If you do end up using the code kindly write your poem in the comments section.
Saurav Maheshkar