neuro-lab7/prep.py
2025-12-06 13:50:19 +02:00

30 lines
492 B
Python

import numpy as np
import pandas as pd
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from tqdm import tqdm
tqdm.pandas()
from preprocessor import fr
print("I")
t = pd.read_csv("yelp_review_polarity_csv/train.csv",
header = None,
names = ['c', 'r'])
print("R")
y = (t['c'] - 1)
r = t['r']
r = r.progress_apply(fr)
o = pd.DataFrame([y, r]).T
o.to_csv("prepped_train.csv")