From 8665e3d4754345516c1147966f26db2bb4f85c64 Mon Sep 17 00:00:00 2001 From: hasslesstech Date: Sat, 6 Dec 2025 16:36:55 +0200 Subject: [PATCH] 4 --- detect.py | 35 +++++++++++++++++++++++++++++++++++ main.py | 8 ++++++++ preprocessor.py | 6 +++--- 3 files changed, 46 insertions(+), 3 deletions(-) create mode 100644 detect.py diff --git a/detect.py b/detect.py new file mode 100644 index 0000000..c1c9ddc --- /dev/null +++ b/detect.py @@ -0,0 +1,35 @@ +from model import m +from preprocessor import frs +import pickle +import time + +from sys import exit + +from tensorflow.keras.utils import pad_sequences as kps + +m.load_weights("model2.keras") +tk = pickle.load(open('tokenizer.pickle', 'rb')) + +while True: + t = "" + try: + t = frs(input("Comment: ")) + except EOFError: + print("\nExiting") + exit(0) + + print(f"Processed: {t}") + + s = tk.texts_to_sequences([t]) + print(f"Sequence: {s[0]}") + + ps = kps(s, maxlen = 100) + + p = m.predict(ps) + + if p >= 0.75: + print(f"Result: positive ({p[0]})\n") + elif p <= 0.25: + print(f"Result: negative ({p[0]})\n") + else: + print(f"Result: unsure ({p[0]})\n") diff --git a/main.py b/main.py index a47aabb..5bb2c82 100644 --- a/main.py +++ b/main.py @@ -10,6 +10,9 @@ from tensorflow.keras.utils import pad_sequences as kps import numpy as np import pandas as pd +import pickle + + print("I") t = pd.read_csv("prepped_train.csv", header = None, @@ -22,6 +25,9 @@ r = t['r'].astype(str) tk = kT(num_words = 6000) tk.fit_on_texts(r) +with open('tokenizer.pickle', 'wb') as f: + pickle.dump(tk, f, protocol = pickle.HIGHEST_PROTOCOL) + print("F") s = tk.texts_to_sequences(r) @@ -50,6 +56,7 @@ m.compile(optimizer = ko.Lion(learning_rate = 0.0005), metrics = ['accuracy']) ''' +''' from model import m ckpt = kc.ModelCheckpoint('model2.keras', @@ -64,3 +71,4 @@ history = m.fit(ts, batch_size = 1024, validation_split = 0.1, callbacks = [ckpt]) +''' diff --git a/preprocessor.py b/preprocessor.py index e90251b..a431b8a 100644 --- a/preprocessor.py +++ b/preprocessor.py @@ -5,9 +5,9 @@ from nltk.tokenize import word_tokenize from nltk.stem import WordNetLemmatizer from spellchecker import SpellChecker as sc -nltk.download("stopwords") -nltk.download("punkt_tab") -nltk.download("wordnet") +#nltk.download("stopwords") +#nltk.download("punkt_tab") +#nltk.download("wordnet") def fr(r): r = r.lower()