From 47a5a196e26913986874387052be0a24e5544ee7 Mon Sep 17 00:00:00 2001
From: hasslesstech <hasslesstech@tutanota.com>
Date: Thu, 11 Dec 2025 12:57:18 +0200
Subject: [PATCH] incremental update

---
 detect.py        | 28 ++++++++++++++++++++++++++++
 preprocessing.py |  6 +++++-
 2 files changed, 33 insertions(+), 1 deletion(-)
 create mode 100644 detect.py

diff --git a/detect.py b/detect.py
new file mode 100644
index 0000000..1b257dc
--- /dev/null
+++ b/detect.py
@@ -0,0 +1,28 @@
+from sys import argv, exit
+
+if len(argv) != 2:
+    exit(1)
+
+from model import *
+from preprocessing import *
+from cc import decode_batch_predictions
+
+import numpy as np
+from spellchecker import SpellChecker
+
+sc = SpellChecker()
+
+m = model(input_dim = fft_length // 2 + 1,
+          output_dim = char_to_num.vocabulary_size())
+
+m.load_weights('model41-best.keras')
+
+sg, _ = encode_single_sample_selectable_dir(argv[1], "")
+
+seq = m.predict(np.array([sg]))
+
+dc = decode_batch_predictions(seq)[0]
+print(f"Decode : {dc}")
+
+cdc = ' '.join([sc.correction(i) if sc.correction(i) else i for i in dc.split()])
+print(f"Correct: {cdc}")
diff --git a/preprocessing.py b/preprocessing.py
index cf6401b..407ee52 100644
--- a/preprocessing.py
+++ b/preprocessing.py
@@ -17,7 +17,11 @@ fft_length = 384
 wavs = '/mnt/tmpfs1/LJSpeech-1.1/wavs/'
 
 def encode_single_sample(wav, label):
-    file = tf.io.read_file(wavs + wav + ".wav")
+    # for backward compatibility
+    encode_single_sample_selectable_dir(wavs + wav + ".wav", label)
+
+def encode_single_sample_selectable_dir(wav, label):
+    file = tf.io.read_file(wav)
     audio, _ = tf.audio.decode_wav(file)
     audio = tf.squeeze(audio, axis = -1)
     audio = tf.cast(audio, tf.float32)