filter_test_simple.y works well and boosts gain, filter_test.py applies excellent noise reduction, but not real time

2025-11-17 02:57:48 +00:00 · 2025-11-17 02:57:48 +00:00 · c15b081b53
parent 575754a294
commit c15b081b53
14 changed files with 309 additions and 0 deletions
--- a/pycache/rnnoise.cpython-311.pyc
+++ b/pycache/rnnoise.cpython-311.pyc
--- a/denoised_audio.wav
+++ b/denoised_audio.wav
--- a/denoised_mono_48k.wav
+++ b/denoised_mono_48k.wav
--- a/filter_test.py
+++ b/filter_test.py
@ -0,0 +1,101 @@
 import wave
 import numpy as np
 import soundfile as sf
 import noisereduce as nr
 from scipy.signal import butter, lfilter
 FIFO_PATH = "/tmp/esp32_audio"
 RAW_FILE = "recorded_audio.wav"
 BANDPASS_FILE = "recorded_audio_bandpass.wav"
 NOISE_REDUCED_FILE = "recorded_audio_noise_reduced.wav"
 SAMPLE_RATE = 16000
 CHANNELS = 2
 BYTES_PER_SAMPLE = 2
 # --- DSP helper: gentler band-pass filter ---
 def butter_bandpass(lowcut, highcut, fs, order=2):
    nyq = 0.5 * fs
    low = lowcut / nyq
    high = highcut / nyq
    b, a = butter(order, [low, high], btype="band")
    return b, a
 b, a = butter_bandpass(80, 7000, SAMPLE_RATE, order=2)
 zi = [np.zeros(max(len(a), len(b)) - 1) for _ in range(CHANNELS)]
 def bandpass_filter(data, channel):
    global zi
    filtered, zi[channel] = lfilter(b, a, data, zi=zi[channel])
    return filtered
 # --- Step 1: Capture ~2s of noise (runtime only) ---
 print("Sampling background noise...")
 with open(FIFO_PATH, "rb") as f:
    noise_data = f.read(SAMPLE_RATE * BYTES_PER_SAMPLE * CHANNELS * 2)  # 2 seconds
 noise_np = np.frombuffer(noise_data, dtype=np.int16).reshape(-1, CHANNELS)
 noise_mono = noise_np.mean(axis=1)  # downmix to mono
 # --- Configure WAV writers ---
 raw_wav = wave.open(RAW_FILE, "wb")
 raw_wav.setnchannels(CHANNELS)
 raw_wav.setsampwidth(BYTES_PER_SAMPLE)
 raw_wav.setframerate(SAMPLE_RATE)
 bandpass_wav = wave.open(BANDPASS_FILE, "wb")
 bandpass_wav.setnchannels(CHANNELS)
 bandpass_wav.setsampwidth(BYTES_PER_SAMPLE)
 bandpass_wav.setframerate(SAMPLE_RATE)
 # --- Step 2: Record raw + band-pass in parallel ---
 with open(FIFO_PATH, "rb") as f:
    print(f"Recording from {FIFO_PATH} into {RAW_FILE} and {BANDPASS_FILE}...")
    try:
        while True:
            data = f.read(8192)  # aligned block size
            if not data:
                continue
            # --- Write raw ---
            raw_wav.writeframes(data)
            # --- Convert to numpy ---
            audio_np = np.frombuffer(data, dtype=np.int16).reshape(-1, CHANNELS)
            # --- Band-pass filter each channel ---
            filtered_channels = []
            for ch in range(CHANNELS):
                filtered_channels.append(bandpass_filter(audio_np[:, ch], ch))
            filtered_np = np.stack(filtered_channels, axis=1).astype(np.int16)
            # --- Write band-pass filtered ---
            bandpass_wav.writeframes(filtered_np.tobytes())
    except KeyboardInterrupt:
        print("\nStopping recording.")
    finally:
        raw_wav.close()
        bandpass_wav.close()
        print(f"Saved {RAW_FILE} and {BANDPASS_FILE}")
 # --- Step 3: Post-process noise reduction ---
 print("Running noise reduction on raw file...")
 raw_audio, sr = sf.read(RAW_FILE, dtype="float32")  # ensure float32 in [-1,1]
 # Downmix both to mono
 raw_mono = raw_audio.mean(axis=1)
 noise_mono = noise_np.astype(np.float32).mean(axis=1) / 32768.0  # scale int16 -> [-1,1]
 print("Noise RMS:", np.sqrt(np.mean(noise_mono**2)))
 print("Raw RMS:", np.sqrt(np.mean(raw_mono**2)))
 # Apply noise reduction
 filtered_mono = nr.reduce_noise(y=raw_mono,
                                sr=sr,
                                y_noise=noise_mono,
                                prop_decrease=0.7)
 # Mirror back to stereo
 filtered_stereo = np.column_stack((filtered_mono, filtered_mono))
 sf.write(NOISE_REDUCED_FILE, filtered_stereo, sr)
 print(f"Saved {NOISE_REDUCED_FILE}")
--- a/filter_test_simple.py
+++ b/filter_test_simple.py
@ -0,0 +1,76 @@
 import wave
 import numpy as np
 from scipy.signal import butter, lfilter
 FIFO_PATH = "/tmp/esp32_audio"
 RAW_FILE = "recorded_audio.wav"
 FILTERED_FILE = "recorded_audio_filtered.wav"
 SAMPLE_RATE = 16000
 CHANNELS = 2
 BYTES_PER_SAMPLE = 2
 # --- DSP helper: smoother band-pass filter ---
 def butter_bandpass(lowcut, highcut, fs, order=4):
    nyq = 0.5 * fs
    low = lowcut / nyq
    high = highcut / nyq
    b, a = butter(order, [low, high], btype="band")
    return b, a
 # Keep filter state across chunks (avoids clicks)
 b, a = butter_bandpass(120, 6000, SAMPLE_RATE, order=2)
 zi = [np.zeros(max(len(a), len(b)) - 1) for _ in range(CHANNELS)]
 def bandpass_filter(data, channel):
    global zi
    filtered, zi[channel] = lfilter(b, a, data, zi=zi[channel])
    return filtered
 # --- Configure WAV writers ---
 raw_wav = wave.open(RAW_FILE, "wb")
 raw_wav.setnchannels(CHANNELS)
 raw_wav.setsampwidth(BYTES_PER_SAMPLE)
 raw_wav.setframerate(SAMPLE_RATE)
 filtered_wav = wave.open(FILTERED_FILE, "wb")
 filtered_wav.setnchannels(CHANNELS)
 filtered_wav.setsampwidth(BYTES_PER_SAMPLE)
 filtered_wav.setframerate(SAMPLE_RATE)
 with open(FIFO_PATH, "rb") as f:
    print(f"Recording from {FIFO_PATH} into {RAW_FILE} and {FILTERED_FILE}...")
    try:
        while True:
            data = f.read(8000)  # ~0.125s stereo
            if not data:
                continue
            # --- Write raw ---
            raw_wav.writeframes(data)
            # --- Convert to numpy ---
            audio_np = np.frombuffer(data, dtype=np.int16).reshape(-1, CHANNELS)
            # --- Filter each channel with persistent state ---
            filtered_channels = []
            for ch in range(CHANNELS):
                filtered_channels.append(bandpass_filter(audio_np[:, ch], ch))
            filtered_np = np.stack(filtered_channels, axis=1).astype(np.int16)
            GAIN = 6.0  # 2x louder
            # --- Apply gain after filtering ---
            filtered_np = np.stack(filtered_channels, axis=1)
            # Scale and clip to int16 range
            filtered_np = np.clip(filtered_np * GAIN, -32768, 32767).astype(np.int16)
            # --- Write filtered ---
            filtered_wav.writeframes(filtered_np.tobytes())
    except KeyboardInterrupt:
        print("\nStopping recording.")
    finally:
        raw_wav.close()
        filtered_wav.close()
        print(f"Saved {RAW_FILE} and {FILTERED_FILE}")
--- a/raw_mono.wav
+++ b/raw_mono.wav
--- a/raw_mono_48k.wav
+++ b/raw_mono_48k.wav
--- a/recorded_audio.wav
+++ b/recorded_audio.wav
--- a/recorded_audio_bandpass.wav
+++ b/recorded_audio_bandpass.wav
--- a/recorded_audio_filtered.wav
+++ b/recorded_audio_filtered.wav
--- a/recorded_audio_noise_reduced.wav
+++ b/recorded_audio_noise_reduced.wav
--- a/BIN
+++ b/BIN
--- a/rnnoise_c_test.c
+++ b/rnnoise_c_test.c
@ -0,0 +1,22 @@
 #include <stdio.h>
 #include <string.h>
 #include <rnnoise.h>
 int main() {
    DenoiseState *st = rnnoise_create(NULL);  // pass NULL for default model
    if (!st) {
        fprintf(stderr, "rnnoise_create failed\n");
        return 1;
    }
    float in[480];
    float out[480];
    memset(in, 0, sizeof(in));
    memset(out, 0, sizeof(out));
    float vad = rnnoise_process_frame(st, out, in);
    printf("VAD: %f\n", vad);
    rnnoise_destroy(st);
    return 0;
 }
--- a/rntest.py
+++ b/rntest.py
@ -0,0 +1,110 @@
 import wave
 import numpy as np
 import ctypes
 from ctypes import c_void_p, c_float, POINTER
 from ctypes.util import find_library
 from scipy.signal import butter, lfilter
 FIFO_PATH = "/tmp/esp32_audio"
 RAW_FILE = "raw_mono_48k.wav"
 DENOISED_FILE = "denoised_mono_48k.wav"
 IN_SR = 16000
 TARGET_SR = 48000
 CHANNELS_IN = 2
 BYTES_PER_SAMPLE = 2
 FRAME_SIZE = 480   # RNNoise frame size at 48kHz
 IN_FRAME_16K = 160 # 160 samples @16kHz → upsample ×3 → 480 @48kHz
 # --- High-pass filter design ---
 def highpass_filter(data, cutoff=100, fs=TARGET_SR, order=4):
    b, a = butter(order, cutoff / (0.5 * fs), btype='high', analog=False)
    return lfilter(b, a, data)
 # --- Simple linear upsample 16k → 48k (factor 3) ---
 def upsample3(x):
    out = np.empty(len(x)*3, dtype=np.float32)
    out[0::3] = x
    out[1::3] = (2*x + np.append(x[1:], x[-1]))/3.0
    out[2::3] = (x + np.append(x[1:], x[-1]))/2.0
    return out
 # --- Load RNNoise ---
 libname = find_library("rnnoise")
 if not libname:
    raise RuntimeError("librnnoise not found. Run sudo ldconfig after install.")
 rn = ctypes.CDLL(libname)
 rn.rnnoise_create.argtypes = [c_void_p]   # takes RNNModel* (NULL for default)
 rn.rnnoise_create.restype = c_void_p
 rn.rnnoise_destroy.argtypes = [c_void_p]
 rn.rnnoise_process_frame.argtypes = [c_void_p,
                                     POINTER(c_float),
                                     POINTER(c_float)]
 rn.rnnoise_process_frame.restype = c_float
 st = rn.rnnoise_create(None)  # NULL = default model
 # --- Configure WAV writers ---
 raw_wav = wave.open(RAW_FILE, "wb")
 raw_wav.setnchannels(1)
 raw_wav.setsampwidth(2)
 raw_wav.setframerate(TARGET_SR)
 den_wav = wave.open(DENOISED_FILE, "wb")
 den_wav.setnchannels(1)
 den_wav.setsampwidth(2)
 den_wav.setframerate(TARGET_SR)
 buf = np.empty((0,), dtype=np.int16)
 print(f"Recording {FIFO_PATH} → {RAW_FILE}, {DENOISED_FILE}")
 try:
    with open(FIFO_PATH, "rb") as f:
        while True:
            data = f.read(IN_FRAME_16K * BYTES_PER_SAMPLE * CHANNELS_IN)
            if not data:
                continue
            # Downmix stereo → mono @16k
            stereo = np.frombuffer(data, dtype=np.int16).reshape(-1, CHANNELS_IN)
            mono16 = stereo.mean(axis=1).astype(np.int16)
            buf = np.concatenate([buf, mono16])
            # Process when we have multiples of 160 samples
            while len(buf) >= IN_FRAME_16K:
                frame16 = buf[:IN_FRAME_16K].astype(np.float32) / 32768.0
                buf = buf[IN_FRAME_16K:]
                # Upsample to 48kHz (480 samples)
                frame48 = upsample3(frame16)
                # --- Apply high-pass filter ---
                frame48 = highpass_filter(frame48, cutoff=100, fs=TARGET_SR)
                frame48 = np.ascontiguousarray(frame48, dtype=np.float32)
                # --- Write raw upsampled mono (with HPF) ---
                raw_wav.writeframes(
                    np.clip(frame48 * 32767.0, -32768, 32767).astype(np.int16).tobytes()
                )
                # --- Denoise ---
                out48 = np.zeros(FRAME_SIZE, dtype=np.float32)
                rn.rnnoise_process_frame(
                    st,
                    out48.ctypes.data_as(POINTER(c_float)),
                    frame48.ctypes.data_as(POINTER(c_float))
                )
                den_wav.writeframes(
                    np.clip(out48 * 32767.0, -32768, 32767).astype(np.int16).tobytes()
                )
 except KeyboardInterrupt:
    pass
 finally:
    raw_wav.close()
    den_wav.close()
    rn.rnnoise_destroy(st)
    print(f"Saved {RAW_FILE} and {DENOISED_FILE}")