From 47a5a196e26913986874387052be0a24e5544ee7 Mon Sep 17 00:00:00 2001 From: hasslesstech Date: Thu, 11 Dec 2025 12:57:18 +0200 Subject: [PATCH] incremental update --- detect.py | 28 ++++++++++++++++++++++++++++ preprocessing.py | 6 +++++- 2 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 detect.py diff --git a/detect.py b/detect.py new file mode 100644 index 0000000..1b257dc --- /dev/null +++ b/detect.py @@ -0,0 +1,28 @@ +from sys import argv, exit + +if len(argv) != 2: + exit(1) + +from model import * +from preprocessing import * +from cc import decode_batch_predictions + +import numpy as np +from spellchecker import SpellChecker + +sc = SpellChecker() + +m = model(input_dim = fft_length // 2 + 1, + output_dim = char_to_num.vocabulary_size()) + +m.load_weights('model41-best.keras') + +sg, _ = encode_single_sample_selectable_dir(argv[1], "") + +seq = m.predict(np.array([sg])) + +dc = decode_batch_predictions(seq)[0] +print(f"Decode : {dc}") + +cdc = ' '.join([sc.correction(i) if sc.correction(i) else i for i in dc.split()]) +print(f"Correct: {cdc}") diff --git a/preprocessing.py b/preprocessing.py index cf6401b..407ee52 100644 --- a/preprocessing.py +++ b/preprocessing.py @@ -17,7 +17,11 @@ fft_length = 384 wavs = '/mnt/tmpfs1/LJSpeech-1.1/wavs/' def encode_single_sample(wav, label): - file = tf.io.read_file(wavs + wav + ".wav") + # for backward compatibility + encode_single_sample_selectable_dir(wavs + wav + ".wav", label) + +def encode_single_sample_selectable_dir(wav, label): + file = tf.io.read_file(wav) audio, _ = tf.audio.decode_wav(file) audio = tf.squeeze(audio, axis = -1) audio = tf.cast(audio, tf.float32)