30 lines
492 B
Python
30 lines
492 B
Python
import numpy as np
|
|
import pandas as pd
|
|
|
|
import nltk
|
|
from nltk.corpus import stopwords
|
|
from nltk.tokenize import word_tokenize
|
|
from nltk.stem import WordNetLemmatizer
|
|
|
|
from tqdm import tqdm
|
|
tqdm.pandas()
|
|
|
|
from preprocessor import fr
|
|
|
|
print("I")
|
|
|
|
t = pd.read_csv("yelp_review_polarity_csv/train.csv",
|
|
header = None,
|
|
names = ['c', 'r'])
|
|
|
|
print("R")
|
|
|
|
y = (t['c'] - 1)
|
|
r = t['r']
|
|
|
|
r = r.progress_apply(fr)
|
|
|
|
o = pd.DataFrame([y, r]).T
|
|
|
|
o.to_csv("prepped_train.csv")
|