diff --git a/__pycache__/rnnoise.cpython-311.pyc b/__pycache__/rnnoise.cpython-311.pyc new file mode 100644 index 0000000..0a4f7c0 Binary files /dev/null and b/__pycache__/rnnoise.cpython-311.pyc differ diff --git a/denoised_audio.wav b/denoised_audio.wav new file mode 100644 index 0000000..d26b87b Binary files /dev/null and b/denoised_audio.wav differ diff --git a/denoised_mono_48k.wav b/denoised_mono_48k.wav new file mode 100644 index 0000000..b8a4f82 Binary files /dev/null and b/denoised_mono_48k.wav differ diff --git a/filter_test.py b/filter_test.py new file mode 100644 index 0000000..269a3f6 --- /dev/null +++ b/filter_test.py @@ -0,0 +1,101 @@ +import wave +import numpy as np +import soundfile as sf +import noisereduce as nr +from scipy.signal import butter, lfilter + +FIFO_PATH = "/tmp/esp32_audio" +RAW_FILE = "recorded_audio.wav" +BANDPASS_FILE = "recorded_audio_bandpass.wav" +NOISE_REDUCED_FILE = "recorded_audio_noise_reduced.wav" + +SAMPLE_RATE = 16000 +CHANNELS = 2 +BYTES_PER_SAMPLE = 2 + +# --- DSP helper: gentler band-pass filter --- +def butter_bandpass(lowcut, highcut, fs, order=2): + nyq = 0.5 * fs + low = lowcut / nyq + high = highcut / nyq + b, a = butter(order, [low, high], btype="band") + return b, a + +b, a = butter_bandpass(80, 7000, SAMPLE_RATE, order=2) +zi = [np.zeros(max(len(a), len(b)) - 1) for _ in range(CHANNELS)] + +def bandpass_filter(data, channel): + global zi + filtered, zi[channel] = lfilter(b, a, data, zi=zi[channel]) + return filtered + +# --- Step 1: Capture ~2s of noise (runtime only) --- +print("Sampling background noise...") +with open(FIFO_PATH, "rb") as f: + noise_data = f.read(SAMPLE_RATE * BYTES_PER_SAMPLE * CHANNELS * 2) # 2 seconds +noise_np = np.frombuffer(noise_data, dtype=np.int16).reshape(-1, CHANNELS) +noise_mono = noise_np.mean(axis=1) # downmix to mono + +# --- Configure WAV writers --- +raw_wav = wave.open(RAW_FILE, "wb") +raw_wav.setnchannels(CHANNELS) +raw_wav.setsampwidth(BYTES_PER_SAMPLE) +raw_wav.setframerate(SAMPLE_RATE) + +bandpass_wav = wave.open(BANDPASS_FILE, "wb") +bandpass_wav.setnchannels(CHANNELS) +bandpass_wav.setsampwidth(BYTES_PER_SAMPLE) +bandpass_wav.setframerate(SAMPLE_RATE) + +# --- Step 2: Record raw + band-pass in parallel --- +with open(FIFO_PATH, "rb") as f: + print(f"Recording from {FIFO_PATH} into {RAW_FILE} and {BANDPASS_FILE}...") + try: + while True: + data = f.read(8192) # aligned block size + if not data: + continue + + # --- Write raw --- + raw_wav.writeframes(data) + + # --- Convert to numpy --- + audio_np = np.frombuffer(data, dtype=np.int16).reshape(-1, CHANNELS) + + # --- Band-pass filter each channel --- + filtered_channels = [] + for ch in range(CHANNELS): + filtered_channels.append(bandpass_filter(audio_np[:, ch], ch)) + filtered_np = np.stack(filtered_channels, axis=1).astype(np.int16) + + # --- Write band-pass filtered --- + bandpass_wav.writeframes(filtered_np.tobytes()) + + except KeyboardInterrupt: + print("\nStopping recording.") + finally: + raw_wav.close() + bandpass_wav.close() + print(f"Saved {RAW_FILE} and {BANDPASS_FILE}") + +# --- Step 3: Post-process noise reduction --- +print("Running noise reduction on raw file...") +raw_audio, sr = sf.read(RAW_FILE, dtype="float32") # ensure float32 in [-1,1] + +# Downmix both to mono +raw_mono = raw_audio.mean(axis=1) +noise_mono = noise_np.astype(np.float32).mean(axis=1) / 32768.0 # scale int16 -> [-1,1] + +print("Noise RMS:", np.sqrt(np.mean(noise_mono**2))) +print("Raw RMS:", np.sqrt(np.mean(raw_mono**2))) + +# Apply noise reduction +filtered_mono = nr.reduce_noise(y=raw_mono, + sr=sr, + y_noise=noise_mono, + prop_decrease=0.7) + +# Mirror back to stereo +filtered_stereo = np.column_stack((filtered_mono, filtered_mono)) +sf.write(NOISE_REDUCED_FILE, filtered_stereo, sr) +print(f"Saved {NOISE_REDUCED_FILE}") diff --git a/filter_test_simple.py b/filter_test_simple.py new file mode 100644 index 0000000..2473b46 --- /dev/null +++ b/filter_test_simple.py @@ -0,0 +1,76 @@ +import wave +import numpy as np +from scipy.signal import butter, lfilter + +FIFO_PATH = "/tmp/esp32_audio" +RAW_FILE = "recorded_audio.wav" +FILTERED_FILE = "recorded_audio_filtered.wav" + +SAMPLE_RATE = 16000 +CHANNELS = 2 +BYTES_PER_SAMPLE = 2 + +# --- DSP helper: smoother band-pass filter --- +def butter_bandpass(lowcut, highcut, fs, order=4): + nyq = 0.5 * fs + low = lowcut / nyq + high = highcut / nyq + b, a = butter(order, [low, high], btype="band") + return b, a + +# Keep filter state across chunks (avoids clicks) +b, a = butter_bandpass(120, 6000, SAMPLE_RATE, order=2) +zi = [np.zeros(max(len(a), len(b)) - 1) for _ in range(CHANNELS)] + +def bandpass_filter(data, channel): + global zi + filtered, zi[channel] = lfilter(b, a, data, zi=zi[channel]) + return filtered + +# --- Configure WAV writers --- +raw_wav = wave.open(RAW_FILE, "wb") +raw_wav.setnchannels(CHANNELS) +raw_wav.setsampwidth(BYTES_PER_SAMPLE) +raw_wav.setframerate(SAMPLE_RATE) + +filtered_wav = wave.open(FILTERED_FILE, "wb") +filtered_wav.setnchannels(CHANNELS) +filtered_wav.setsampwidth(BYTES_PER_SAMPLE) +filtered_wav.setframerate(SAMPLE_RATE) + +with open(FIFO_PATH, "rb") as f: + print(f"Recording from {FIFO_PATH} into {RAW_FILE} and {FILTERED_FILE}...") + try: + while True: + data = f.read(8000) # ~0.125s stereo + if not data: + continue + + # --- Write raw --- + raw_wav.writeframes(data) + + # --- Convert to numpy --- + audio_np = np.frombuffer(data, dtype=np.int16).reshape(-1, CHANNELS) + + # --- Filter each channel with persistent state --- + filtered_channels = [] + for ch in range(CHANNELS): + filtered_channels.append(bandpass_filter(audio_np[:, ch], ch)) + filtered_np = np.stack(filtered_channels, axis=1).astype(np.int16) + + GAIN = 6.0 # 2x louder + + # --- Apply gain after filtering --- + filtered_np = np.stack(filtered_channels, axis=1) + + # Scale and clip to int16 range + filtered_np = np.clip(filtered_np * GAIN, -32768, 32767).astype(np.int16) + # --- Write filtered --- + filtered_wav.writeframes(filtered_np.tobytes()) + + except KeyboardInterrupt: + print("\nStopping recording.") + finally: + raw_wav.close() + filtered_wav.close() + print(f"Saved {RAW_FILE} and {FILTERED_FILE}") diff --git a/raw_mono.wav b/raw_mono.wav new file mode 100644 index 0000000..d26b87b Binary files /dev/null and b/raw_mono.wav differ diff --git a/raw_mono_48k.wav b/raw_mono_48k.wav new file mode 100644 index 0000000..a120aef Binary files /dev/null and b/raw_mono_48k.wav differ diff --git a/recorded_audio.wav b/recorded_audio.wav index 40e70e2..d0d0fe1 100644 Binary files a/recorded_audio.wav and b/recorded_audio.wav differ diff --git a/recorded_audio_bandpass.wav b/recorded_audio_bandpass.wav new file mode 100644 index 0000000..f9ac951 Binary files /dev/null and b/recorded_audio_bandpass.wav differ diff --git a/recorded_audio_filtered.wav b/recorded_audio_filtered.wav new file mode 100644 index 0000000..cd16bc8 Binary files /dev/null and b/recorded_audio_filtered.wav differ diff --git a/recorded_audio_noise_reduced.wav b/recorded_audio_noise_reduced.wav new file mode 100644 index 0000000..410508b Binary files /dev/null and b/recorded_audio_noise_reduced.wav differ diff --git a/rnnoise_c_test b/rnnoise_c_test new file mode 100755 index 0000000..17a0950 Binary files /dev/null and b/rnnoise_c_test differ diff --git a/rnnoise_c_test.c b/rnnoise_c_test.c new file mode 100644 index 0000000..7de976a --- /dev/null +++ b/rnnoise_c_test.c @@ -0,0 +1,22 @@ +#include +#include +#include + +int main() { + DenoiseState *st = rnnoise_create(NULL); // pass NULL for default model + if (!st) { + fprintf(stderr, "rnnoise_create failed\n"); + return 1; + } + + float in[480]; + float out[480]; + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + float vad = rnnoise_process_frame(st, out, in); + printf("VAD: %f\n", vad); + + rnnoise_destroy(st); + return 0; +} diff --git a/rntest.py b/rntest.py new file mode 100644 index 0000000..bb08e0c --- /dev/null +++ b/rntest.py @@ -0,0 +1,110 @@ +import wave +import numpy as np +import ctypes +from ctypes import c_void_p, c_float, POINTER +from ctypes.util import find_library +from scipy.signal import butter, lfilter + +FIFO_PATH = "/tmp/esp32_audio" +RAW_FILE = "raw_mono_48k.wav" +DENOISED_FILE = "denoised_mono_48k.wav" + +IN_SR = 16000 +TARGET_SR = 48000 +CHANNELS_IN = 2 +BYTES_PER_SAMPLE = 2 +FRAME_SIZE = 480 # RNNoise frame size at 48kHz +IN_FRAME_16K = 160 # 160 samples @16kHz → upsample ×3 → 480 @48kHz + +# --- High-pass filter design --- +def highpass_filter(data, cutoff=100, fs=TARGET_SR, order=4): + b, a = butter(order, cutoff / (0.5 * fs), btype='high', analog=False) + return lfilter(b, a, data) + +# --- Simple linear upsample 16k → 48k (factor 3) --- +def upsample3(x): + out = np.empty(len(x)*3, dtype=np.float32) + out[0::3] = x + out[1::3] = (2*x + np.append(x[1:], x[-1]))/3.0 + out[2::3] = (x + np.append(x[1:], x[-1]))/2.0 + return out + +# --- Load RNNoise --- +libname = find_library("rnnoise") +if not libname: + raise RuntimeError("librnnoise not found. Run sudo ldconfig after install.") +rn = ctypes.CDLL(libname) + +rn.rnnoise_create.argtypes = [c_void_p] # takes RNNModel* (NULL for default) +rn.rnnoise_create.restype = c_void_p +rn.rnnoise_destroy.argtypes = [c_void_p] +rn.rnnoise_process_frame.argtypes = [c_void_p, + POINTER(c_float), + POINTER(c_float)] +rn.rnnoise_process_frame.restype = c_float + +st = rn.rnnoise_create(None) # NULL = default model + +# --- Configure WAV writers --- +raw_wav = wave.open(RAW_FILE, "wb") +raw_wav.setnchannels(1) +raw_wav.setsampwidth(2) +raw_wav.setframerate(TARGET_SR) + +den_wav = wave.open(DENOISED_FILE, "wb") +den_wav.setnchannels(1) +den_wav.setsampwidth(2) +den_wav.setframerate(TARGET_SR) + +buf = np.empty((0,), dtype=np.int16) + +print(f"Recording {FIFO_PATH} → {RAW_FILE}, {DENOISED_FILE}") +try: + with open(FIFO_PATH, "rb") as f: + while True: + data = f.read(IN_FRAME_16K * BYTES_PER_SAMPLE * CHANNELS_IN) + if not data: + continue + + # Downmix stereo → mono @16k + stereo = np.frombuffer(data, dtype=np.int16).reshape(-1, CHANNELS_IN) + mono16 = stereo.mean(axis=1).astype(np.int16) + + buf = np.concatenate([buf, mono16]) + + # Process when we have multiples of 160 samples + while len(buf) >= IN_FRAME_16K: + frame16 = buf[:IN_FRAME_16K].astype(np.float32) / 32768.0 + buf = buf[IN_FRAME_16K:] + + # Upsample to 48kHz (480 samples) + frame48 = upsample3(frame16) + + # --- Apply high-pass filter --- + frame48 = highpass_filter(frame48, cutoff=100, fs=TARGET_SR) + + frame48 = np.ascontiguousarray(frame48, dtype=np.float32) + + # --- Write raw upsampled mono (with HPF) --- + raw_wav.writeframes( + np.clip(frame48 * 32767.0, -32768, 32767).astype(np.int16).tobytes() + ) + + # --- Denoise --- + out48 = np.zeros(FRAME_SIZE, dtype=np.float32) + rn.rnnoise_process_frame( + st, + out48.ctypes.data_as(POINTER(c_float)), + frame48.ctypes.data_as(POINTER(c_float)) + ) + + den_wav.writeframes( + np.clip(out48 * 32767.0, -32768, 32767).astype(np.int16).tobytes() + ) +except KeyboardInterrupt: + pass +finally: + raw_wav.close() + den_wav.close() + rn.rnnoise_destroy(st) + print(f"Saved {RAW_FILE} and {DENOISED_FILE}")