diff --git a/Stemming.py b/Stemming.py index e0edf70..879a4fa 100644 --- a/Stemming.py +++ b/Stemming.py @@ -32,22 +32,17 @@ space, Professor Satish Dhawan, who succeeded him and Dr. Brahm Prakash, father of nuclear material. I was lucky to have worked with all three of them closely and consider this the great opportunity of my life. I see four milestones in my career""" - - + +#Tokenizing sentences (i.e. paragraph --> list of sentences) sentences = nltk.sent_tokenize(paragraph) +#creating an object stemmer = PorterStemmer() -# Stemming +print(f'Before stemming length of individual sentences:\n{[len(sentence) for sentence in sentences]}\n') +#Stemming for i in range(len(sentences)): - words = nltk.word_tokenize(sentences[i]) + words = nltk.word_tokenize(sentences[i].lower()) words = [stemmer.stem(word) for word in words if word not in set(stopwords.words('english'))] - sentences[i] = ' '.join(words) - - - - - - - - - \ No newline at end of file + sentences[i] = " ".join(words) +# print(sentences) +print(f'\nAfter stemming length of individual sentences:\n{[len(sentence) for sentence in sentences]}\n')