neuro-lab7/analyze.py
2025-12-06 17:04:13 +02:00

69 lines
1.5 KiB
Python

from model import m
from preprocessor import fr
import pandas as pd
import numpy as np
import pickle
import time
from sys import exit
from tensorflow.keras.utils import pad_sequences as kps
from matplotlib import pyplot as plt
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score
from tqdm import tqdm
tqdm.pandas()
def __prep_conf_matr(data):
output_matrix = np.zeros([2, 2])
for p, r in data:
output_matrix[int(p)][int(r)] += 1
return output_matrix
def __plot_conf_matr(data):
matr = __prep_conf_matr(data)
_, ax = plt.subplots()
ax.matshow(matr, cmap = plt.cm.Blues)
for i, x in enumerate(matr):
for j, y in enumerate(x):
ax.text(i,
j,
str(round(y)),
va = "center",
ha = "center")
plt.show()
m.load_weights("model2.keras")
tk = pickle.load(open('tokenizer.pickle', 'rb'))
d = pd.read_csv("yelp_review_polarity_csv/test.csv", header = None, names = ['e', 'c'])
c = d['c']
e = d['e'] - 1
c.progress_apply(fr)
s = tk.texts_to_sequences(c)
ps = kps(s, maxlen = 100)
p = m.predict(ps, batch_size = 1024)
data = [(i >= 0.5, v >= 0.5) for i, v in zip(p, e)]
__plot_conf_matr(data)
p = [i >= 0.5 for i in p]
r = [i >= 0.5 for i in e]
print(f"Accuracy : {accuracy_score(p, r)}")
print(f"Precision : {precision_score(p, r, average = 'binary')}")
print(f"Recall : {recall_score(p, r, average = 'binary')}")
print(f"F1 Score : {f1_score(p, r, average = 'binary')}")