import re import nltk from nltk.corpus import stopwords from nltk.tokenize import word_tokenize from nltk.stem import WordNetLemmatizer from spellchecker import SpellChecker as sc nltk.download("stopwords") nltk.download("punkt_tab") nltk.download("wordnet") def fr(r): r = r.lower() r = " ".join(tuple(re.findall(r'\w+', r))) for i in ['\n', '\r', ',', '.', '-', ';', ':', '\'', '"']: r = r.replace(i, "") sw = set(stopwords.words("english")) l = WordNetLemmatizer() return " ".join([l.lemmatize(i.strip(), pos = 'v') for i in word_tokenize(r) if i.strip() not in sw])