neuro-lab7/prep.py

30 lines
492 B
Python
Raw Normal View History

2025-12-06 13:36:49 +02:00
import numpy as np
import pandas as pd
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from tqdm import tqdm
tqdm.pandas()
from preprocessor import fr
print("I")
t = pd.read_csv("yelp_review_polarity_csv/train.csv",
header = None,
names = ['c', 'r'])
print("R")
2025-12-06 13:50:19 +02:00
y = (t['c'] - 1)
2025-12-06 13:36:49 +02:00
r = t['r']
r = r.progress_apply(fr)
o = pd.DataFrame([y, r]).T
o.to_csv("prepped_train.csv")