filter_test_simple.y works well and boosts gain, filter_test.py applies excellent noise reduction, but not real time

master
jake 2025-11-17 02:57:48 +00:00
parent 575754a294
commit c15b081b53
14 changed files with 309 additions and 0 deletions

Binary file not shown.

BIN
denoised_audio.wav Normal file

Binary file not shown.

BIN
denoised_mono_48k.wav Normal file

Binary file not shown.

101
filter_test.py Normal file
View File

@ -0,0 +1,101 @@
import wave
import numpy as np
import soundfile as sf
import noisereduce as nr
from scipy.signal import butter, lfilter
FIFO_PATH = "/tmp/esp32_audio"
RAW_FILE = "recorded_audio.wav"
BANDPASS_FILE = "recorded_audio_bandpass.wav"
NOISE_REDUCED_FILE = "recorded_audio_noise_reduced.wav"
SAMPLE_RATE = 16000
CHANNELS = 2
BYTES_PER_SAMPLE = 2
# --- DSP helper: gentler band-pass filter ---
def butter_bandpass(lowcut, highcut, fs, order=2):
nyq = 0.5 * fs
low = lowcut / nyq
high = highcut / nyq
b, a = butter(order, [low, high], btype="band")
return b, a
b, a = butter_bandpass(80, 7000, SAMPLE_RATE, order=2)
zi = [np.zeros(max(len(a), len(b)) - 1) for _ in range(CHANNELS)]
def bandpass_filter(data, channel):
global zi
filtered, zi[channel] = lfilter(b, a, data, zi=zi[channel])
return filtered
# --- Step 1: Capture ~2s of noise (runtime only) ---
print("Sampling background noise...")
with open(FIFO_PATH, "rb") as f:
noise_data = f.read(SAMPLE_RATE * BYTES_PER_SAMPLE * CHANNELS * 2) # 2 seconds
noise_np = np.frombuffer(noise_data, dtype=np.int16).reshape(-1, CHANNELS)
noise_mono = noise_np.mean(axis=1) # downmix to mono
# --- Configure WAV writers ---
raw_wav = wave.open(RAW_FILE, "wb")
raw_wav.setnchannels(CHANNELS)
raw_wav.setsampwidth(BYTES_PER_SAMPLE)
raw_wav.setframerate(SAMPLE_RATE)
bandpass_wav = wave.open(BANDPASS_FILE, "wb")
bandpass_wav.setnchannels(CHANNELS)
bandpass_wav.setsampwidth(BYTES_PER_SAMPLE)
bandpass_wav.setframerate(SAMPLE_RATE)
# --- Step 2: Record raw + band-pass in parallel ---
with open(FIFO_PATH, "rb") as f:
print(f"Recording from {FIFO_PATH} into {RAW_FILE} and {BANDPASS_FILE}...")
try:
while True:
data = f.read(8192) # aligned block size
if not data:
continue
# --- Write raw ---
raw_wav.writeframes(data)
# --- Convert to numpy ---
audio_np = np.frombuffer(data, dtype=np.int16).reshape(-1, CHANNELS)
# --- Band-pass filter each channel ---
filtered_channels = []
for ch in range(CHANNELS):
filtered_channels.append(bandpass_filter(audio_np[:, ch], ch))
filtered_np = np.stack(filtered_channels, axis=1).astype(np.int16)
# --- Write band-pass filtered ---
bandpass_wav.writeframes(filtered_np.tobytes())
except KeyboardInterrupt:
print("\nStopping recording.")
finally:
raw_wav.close()
bandpass_wav.close()
print(f"Saved {RAW_FILE} and {BANDPASS_FILE}")
# --- Step 3: Post-process noise reduction ---
print("Running noise reduction on raw file...")
raw_audio, sr = sf.read(RAW_FILE, dtype="float32") # ensure float32 in [-1,1]
# Downmix both to mono
raw_mono = raw_audio.mean(axis=1)
noise_mono = noise_np.astype(np.float32).mean(axis=1) / 32768.0 # scale int16 -> [-1,1]
print("Noise RMS:", np.sqrt(np.mean(noise_mono**2)))
print("Raw RMS:", np.sqrt(np.mean(raw_mono**2)))
# Apply noise reduction
filtered_mono = nr.reduce_noise(y=raw_mono,
sr=sr,
y_noise=noise_mono,
prop_decrease=0.7)
# Mirror back to stereo
filtered_stereo = np.column_stack((filtered_mono, filtered_mono))
sf.write(NOISE_REDUCED_FILE, filtered_stereo, sr)
print(f"Saved {NOISE_REDUCED_FILE}")

76
filter_test_simple.py Normal file
View File

@ -0,0 +1,76 @@
import wave
import numpy as np
from scipy.signal import butter, lfilter
FIFO_PATH = "/tmp/esp32_audio"
RAW_FILE = "recorded_audio.wav"
FILTERED_FILE = "recorded_audio_filtered.wav"
SAMPLE_RATE = 16000
CHANNELS = 2
BYTES_PER_SAMPLE = 2
# --- DSP helper: smoother band-pass filter ---
def butter_bandpass(lowcut, highcut, fs, order=4):
nyq = 0.5 * fs
low = lowcut / nyq
high = highcut / nyq
b, a = butter(order, [low, high], btype="band")
return b, a
# Keep filter state across chunks (avoids clicks)
b, a = butter_bandpass(120, 6000, SAMPLE_RATE, order=2)
zi = [np.zeros(max(len(a), len(b)) - 1) for _ in range(CHANNELS)]
def bandpass_filter(data, channel):
global zi
filtered, zi[channel] = lfilter(b, a, data, zi=zi[channel])
return filtered
# --- Configure WAV writers ---
raw_wav = wave.open(RAW_FILE, "wb")
raw_wav.setnchannels(CHANNELS)
raw_wav.setsampwidth(BYTES_PER_SAMPLE)
raw_wav.setframerate(SAMPLE_RATE)
filtered_wav = wave.open(FILTERED_FILE, "wb")
filtered_wav.setnchannels(CHANNELS)
filtered_wav.setsampwidth(BYTES_PER_SAMPLE)
filtered_wav.setframerate(SAMPLE_RATE)
with open(FIFO_PATH, "rb") as f:
print(f"Recording from {FIFO_PATH} into {RAW_FILE} and {FILTERED_FILE}...")
try:
while True:
data = f.read(8000) # ~0.125s stereo
if not data:
continue
# --- Write raw ---
raw_wav.writeframes(data)
# --- Convert to numpy ---
audio_np = np.frombuffer(data, dtype=np.int16).reshape(-1, CHANNELS)
# --- Filter each channel with persistent state ---
filtered_channels = []
for ch in range(CHANNELS):
filtered_channels.append(bandpass_filter(audio_np[:, ch], ch))
filtered_np = np.stack(filtered_channels, axis=1).astype(np.int16)
GAIN = 6.0 # 2x louder
# --- Apply gain after filtering ---
filtered_np = np.stack(filtered_channels, axis=1)
# Scale and clip to int16 range
filtered_np = np.clip(filtered_np * GAIN, -32768, 32767).astype(np.int16)
# --- Write filtered ---
filtered_wav.writeframes(filtered_np.tobytes())
except KeyboardInterrupt:
print("\nStopping recording.")
finally:
raw_wav.close()
filtered_wav.close()
print(f"Saved {RAW_FILE} and {FILTERED_FILE}")

BIN
raw_mono.wav Normal file

Binary file not shown.

BIN
raw_mono_48k.wav Normal file

Binary file not shown.

Binary file not shown.

BIN
recorded_audio_bandpass.wav Normal file

Binary file not shown.

BIN
recorded_audio_filtered.wav Normal file

Binary file not shown.

Binary file not shown.

BIN
rnnoise_c_test Executable file

Binary file not shown.

22
rnnoise_c_test.c Normal file
View File

@ -0,0 +1,22 @@
#include <stdio.h>
#include <string.h>
#include <rnnoise.h>
int main() {
DenoiseState *st = rnnoise_create(NULL); // pass NULL for default model
if (!st) {
fprintf(stderr, "rnnoise_create failed\n");
return 1;
}
float in[480];
float out[480];
memset(in, 0, sizeof(in));
memset(out, 0, sizeof(out));
float vad = rnnoise_process_frame(st, out, in);
printf("VAD: %f\n", vad);
rnnoise_destroy(st);
return 0;
}

110
rntest.py Normal file
View File

@ -0,0 +1,110 @@
import wave
import numpy as np
import ctypes
from ctypes import c_void_p, c_float, POINTER
from ctypes.util import find_library
from scipy.signal import butter, lfilter
FIFO_PATH = "/tmp/esp32_audio"
RAW_FILE = "raw_mono_48k.wav"
DENOISED_FILE = "denoised_mono_48k.wav"
IN_SR = 16000
TARGET_SR = 48000
CHANNELS_IN = 2
BYTES_PER_SAMPLE = 2
FRAME_SIZE = 480 # RNNoise frame size at 48kHz
IN_FRAME_16K = 160 # 160 samples @16kHz → upsample ×3 → 480 @48kHz
# --- High-pass filter design ---
def highpass_filter(data, cutoff=100, fs=TARGET_SR, order=4):
b, a = butter(order, cutoff / (0.5 * fs), btype='high', analog=False)
return lfilter(b, a, data)
# --- Simple linear upsample 16k → 48k (factor 3) ---
def upsample3(x):
out = np.empty(len(x)*3, dtype=np.float32)
out[0::3] = x
out[1::3] = (2*x + np.append(x[1:], x[-1]))/3.0
out[2::3] = (x + np.append(x[1:], x[-1]))/2.0
return out
# --- Load RNNoise ---
libname = find_library("rnnoise")
if not libname:
raise RuntimeError("librnnoise not found. Run sudo ldconfig after install.")
rn = ctypes.CDLL(libname)
rn.rnnoise_create.argtypes = [c_void_p] # takes RNNModel* (NULL for default)
rn.rnnoise_create.restype = c_void_p
rn.rnnoise_destroy.argtypes = [c_void_p]
rn.rnnoise_process_frame.argtypes = [c_void_p,
POINTER(c_float),
POINTER(c_float)]
rn.rnnoise_process_frame.restype = c_float
st = rn.rnnoise_create(None) # NULL = default model
# --- Configure WAV writers ---
raw_wav = wave.open(RAW_FILE, "wb")
raw_wav.setnchannels(1)
raw_wav.setsampwidth(2)
raw_wav.setframerate(TARGET_SR)
den_wav = wave.open(DENOISED_FILE, "wb")
den_wav.setnchannels(1)
den_wav.setsampwidth(2)
den_wav.setframerate(TARGET_SR)
buf = np.empty((0,), dtype=np.int16)
print(f"Recording {FIFO_PATH}{RAW_FILE}, {DENOISED_FILE}")
try:
with open(FIFO_PATH, "rb") as f:
while True:
data = f.read(IN_FRAME_16K * BYTES_PER_SAMPLE * CHANNELS_IN)
if not data:
continue
# Downmix stereo → mono @16k
stereo = np.frombuffer(data, dtype=np.int16).reshape(-1, CHANNELS_IN)
mono16 = stereo.mean(axis=1).astype(np.int16)
buf = np.concatenate([buf, mono16])
# Process when we have multiples of 160 samples
while len(buf) >= IN_FRAME_16K:
frame16 = buf[:IN_FRAME_16K].astype(np.float32) / 32768.0
buf = buf[IN_FRAME_16K:]
# Upsample to 48kHz (480 samples)
frame48 = upsample3(frame16)
# --- Apply high-pass filter ---
frame48 = highpass_filter(frame48, cutoff=100, fs=TARGET_SR)
frame48 = np.ascontiguousarray(frame48, dtype=np.float32)
# --- Write raw upsampled mono (with HPF) ---
raw_wav.writeframes(
np.clip(frame48 * 32767.0, -32768, 32767).astype(np.int16).tobytes()
)
# --- Denoise ---
out48 = np.zeros(FRAME_SIZE, dtype=np.float32)
rn.rnnoise_process_frame(
st,
out48.ctypes.data_as(POINTER(c_float)),
frame48.ctypes.data_as(POINTER(c_float))
)
den_wav.writeframes(
np.clip(out48 * 32767.0, -32768, 32767).astype(np.int16).tobytes()
)
except KeyboardInterrupt:
pass
finally:
raw_wav.close()
den_wav.close()
rn.rnnoise_destroy(st)
print(f"Saved {RAW_FILE} and {DENOISED_FILE}")