From c36951220c5c25b1e8c75953e6489c10f04f4b3c Mon Sep 17 00:00:00 2001 From: hasslesstech Date: Sat, 6 Dec 2025 17:04:13 +0200 Subject: [PATCH] 5 --- analyze.py | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 analyze.py diff --git a/analyze.py b/analyze.py new file mode 100644 index 0000000..c36fc93 --- /dev/null +++ b/analyze.py @@ -0,0 +1,68 @@ +from model import m +from preprocessor import fr +import pandas as pd +import numpy as np +import pickle +import time + +from sys import exit + +from tensorflow.keras.utils import pad_sequences as kps + +from matplotlib import pyplot as plt + +from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score + +from tqdm import tqdm +tqdm.pandas() + + +def __prep_conf_matr(data): + output_matrix = np.zeros([2, 2]) + + for p, r in data: + output_matrix[int(p)][int(r)] += 1 + + return output_matrix + + +def __plot_conf_matr(data): + matr = __prep_conf_matr(data) + + _, ax = plt.subplots() + + ax.matshow(matr, cmap = plt.cm.Blues) + + for i, x in enumerate(matr): + for j, y in enumerate(x): + ax.text(i, + j, + str(round(y)), + va = "center", + ha = "center") + + plt.show() + +m.load_weights("model2.keras") +tk = pickle.load(open('tokenizer.pickle', 'rb')) + +d = pd.read_csv("yelp_review_polarity_csv/test.csv", header = None, names = ['e', 'c']) +c = d['c'] +e = d['e'] - 1 +c.progress_apply(fr) + +s = tk.texts_to_sequences(c) +ps = kps(s, maxlen = 100) + +p = m.predict(ps, batch_size = 1024) + +data = [(i >= 0.5, v >= 0.5) for i, v in zip(p, e)] +__plot_conf_matr(data) + +p = [i >= 0.5 for i in p] +r = [i >= 0.5 for i in e] + +print(f"Accuracy : {accuracy_score(p, r)}") +print(f"Precision : {precision_score(p, r, average = 'binary')}") +print(f"Recall : {recall_score(p, r, average = 'binary')}") +print(f"F1 Score : {f1_score(p, r, average = 'binary')}")