# Read in training datareviews=pd.read_csv('reviews.txt',header=None)labels=pd.read_csv('labels.txt',header=None)
# Count word frequencyfromcollectionsimportCountertotal_counts=Counter()for_,rowinreviews.iterrows():total_counts.update(row[0].split(' '))print("Total words in data set: ",len(total_counts))
('Total words in data set: ', 74074)
# Sort and print top vocab wordsvocab=sorted(total_counts,key=total_counts.get,reverse=True)[:10000]print(vocab[:60])
# Assign index to each wordword2idx={word:ifori,wordinenumerate(vocab)}
# Convert text to vectordeftext_to_vector(text):word_vector=np.zeros(len(vocab),dtype=np.int_)forwordintext.split(' '):idx=word2idx.get(word,None)ifidxisNone:continueelse:word_vector[idx]+=1returnnp.array(word_vector)word_vectors=np.zeros((len(reviews),len(vocab)),dtype=np.int_)forii,(_,text)inenumerate(reviews.iterrows()):word_vectors[ii]=text_to_vector(text[0])
# Split into train, validation and test setsY=(labels=='positive').astype(np.int_)records=len(labels)shuffle=np.arange(records)np.random.shuffle(shuffle)test_fraction=0.9train_split,test_split=shuffle[:int(records*test_fraction)],shuffle[int(records*test_fraction):]trainX,trainY=word_vectors[train_split,:],to_categorical(Y.values[train_split],2)testX,testY=word_vectors[test_split,:],to_categorical(Y.values[test_split],2)
# Network buildingdefbuild_model():# This resets all parameters and variables, leave this heretf.reset_default_graph()# Inputsnet=tflearn.input_data([None,10000])# Hidden layer(s)net=tflearn.fully_connected(net,200,activation='ReLU')net=tflearn.fully_connected(net,25,activation='ReLU')# Output layernet=tflearn.fully_connected(net,2,activation='softmax')net=tflearn.regression(net,optimizer='sgd',learning_rate=0.1,loss='categorical_crossentropy')model=tflearn.DNN(net)returnmodel
# Test accuracypredictions=(np.array(model.predict(testX))[:,0]>=0.5).astype(np.int_)test_accuracy=np.mean(predictions==testY[:,0],axis=0)print("Test accuracy: ",test_accuracy)
('Test accuracy: ', 0.86519999999999997)
# Helper function that uses your model to predict sentimentdeftest_sentence(sentence):positive_prob=model.predict([text_to_vector(sentence.lower())])[0][1]print('Sentence: {}'.format(sentence))print('P(positive) = {:.3f} :'.format(positive_prob),'Positive'ifpositive_prob>0.5else'Negative')
# Test examplessentence="this is a great day"test_sentence(sentence)sentence="this is a disappointing experience"test_sentence(sentence)sentence="this is a greatly disappointing experience"test_sentence(sentence)sentence="I am not going to recommend this person"test_sentence(sentence)sentence="I cannot believe how good she was"test_sentence(sentence)sentence="I thought very highly of the person"test_sentence(sentence)sentence="I thought he was not up to the mark"test_sentence(sentence)sentence="The discussion was very enlightening"test_sentence(sentence)sentence="He could have been more prepared"test_sentence(sentence)sentence="His skills in programming were clearly evident"test_sentence(sentence)sentence="Some improvement in communication would have helped"test_sentence(sentence)sentence="She does not have a great deal of experience with databases"test_sentence(sentence)sentence="His software skills were limited"test_sentence(sentence)sentence="His background in scalable systems is promising"test_sentence(sentence)sentence="It was unusual that he did not know about MapReduce"test_sentence(sentence)sentence="It was not evident that he could perform well"test_sentence(sentence)sentence="His coding skills were amazing"test_sentence(sentence)sentence="He did not perform well on the coding test"test_sentence(sentence)sentence="He demonstrated a good understanding of databases"test_sentence(sentence)sentence="The logic he used to explain that problem was not correct"test_sentence(sentence)