filter_test_simple.y works well and boosts gain, filter_test.py applies excellent noise reduction, but not real time
parent
575754a294
commit
c15b081b53
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1,101 @@
|
||||||
|
import wave
|
||||||
|
import numpy as np
|
||||||
|
import soundfile as sf
|
||||||
|
import noisereduce as nr
|
||||||
|
from scipy.signal import butter, lfilter
|
||||||
|
|
||||||
|
FIFO_PATH = "/tmp/esp32_audio"
|
||||||
|
RAW_FILE = "recorded_audio.wav"
|
||||||
|
BANDPASS_FILE = "recorded_audio_bandpass.wav"
|
||||||
|
NOISE_REDUCED_FILE = "recorded_audio_noise_reduced.wav"
|
||||||
|
|
||||||
|
SAMPLE_RATE = 16000
|
||||||
|
CHANNELS = 2
|
||||||
|
BYTES_PER_SAMPLE = 2
|
||||||
|
|
||||||
|
# --- DSP helper: gentler band-pass filter ---
|
||||||
|
def butter_bandpass(lowcut, highcut, fs, order=2):
|
||||||
|
nyq = 0.5 * fs
|
||||||
|
low = lowcut / nyq
|
||||||
|
high = highcut / nyq
|
||||||
|
b, a = butter(order, [low, high], btype="band")
|
||||||
|
return b, a
|
||||||
|
|
||||||
|
b, a = butter_bandpass(80, 7000, SAMPLE_RATE, order=2)
|
||||||
|
zi = [np.zeros(max(len(a), len(b)) - 1) for _ in range(CHANNELS)]
|
||||||
|
|
||||||
|
def bandpass_filter(data, channel):
|
||||||
|
global zi
|
||||||
|
filtered, zi[channel] = lfilter(b, a, data, zi=zi[channel])
|
||||||
|
return filtered
|
||||||
|
|
||||||
|
# --- Step 1: Capture ~2s of noise (runtime only) ---
|
||||||
|
print("Sampling background noise...")
|
||||||
|
with open(FIFO_PATH, "rb") as f:
|
||||||
|
noise_data = f.read(SAMPLE_RATE * BYTES_PER_SAMPLE * CHANNELS * 2) # 2 seconds
|
||||||
|
noise_np = np.frombuffer(noise_data, dtype=np.int16).reshape(-1, CHANNELS)
|
||||||
|
noise_mono = noise_np.mean(axis=1) # downmix to mono
|
||||||
|
|
||||||
|
# --- Configure WAV writers ---
|
||||||
|
raw_wav = wave.open(RAW_FILE, "wb")
|
||||||
|
raw_wav.setnchannels(CHANNELS)
|
||||||
|
raw_wav.setsampwidth(BYTES_PER_SAMPLE)
|
||||||
|
raw_wav.setframerate(SAMPLE_RATE)
|
||||||
|
|
||||||
|
bandpass_wav = wave.open(BANDPASS_FILE, "wb")
|
||||||
|
bandpass_wav.setnchannels(CHANNELS)
|
||||||
|
bandpass_wav.setsampwidth(BYTES_PER_SAMPLE)
|
||||||
|
bandpass_wav.setframerate(SAMPLE_RATE)
|
||||||
|
|
||||||
|
# --- Step 2: Record raw + band-pass in parallel ---
|
||||||
|
with open(FIFO_PATH, "rb") as f:
|
||||||
|
print(f"Recording from {FIFO_PATH} into {RAW_FILE} and {BANDPASS_FILE}...")
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
data = f.read(8192) # aligned block size
|
||||||
|
if not data:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# --- Write raw ---
|
||||||
|
raw_wav.writeframes(data)
|
||||||
|
|
||||||
|
# --- Convert to numpy ---
|
||||||
|
audio_np = np.frombuffer(data, dtype=np.int16).reshape(-1, CHANNELS)
|
||||||
|
|
||||||
|
# --- Band-pass filter each channel ---
|
||||||
|
filtered_channels = []
|
||||||
|
for ch in range(CHANNELS):
|
||||||
|
filtered_channels.append(bandpass_filter(audio_np[:, ch], ch))
|
||||||
|
filtered_np = np.stack(filtered_channels, axis=1).astype(np.int16)
|
||||||
|
|
||||||
|
# --- Write band-pass filtered ---
|
||||||
|
bandpass_wav.writeframes(filtered_np.tobytes())
|
||||||
|
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print("\nStopping recording.")
|
||||||
|
finally:
|
||||||
|
raw_wav.close()
|
||||||
|
bandpass_wav.close()
|
||||||
|
print(f"Saved {RAW_FILE} and {BANDPASS_FILE}")
|
||||||
|
|
||||||
|
# --- Step 3: Post-process noise reduction ---
|
||||||
|
print("Running noise reduction on raw file...")
|
||||||
|
raw_audio, sr = sf.read(RAW_FILE, dtype="float32") # ensure float32 in [-1,1]
|
||||||
|
|
||||||
|
# Downmix both to mono
|
||||||
|
raw_mono = raw_audio.mean(axis=1)
|
||||||
|
noise_mono = noise_np.astype(np.float32).mean(axis=1) / 32768.0 # scale int16 -> [-1,1]
|
||||||
|
|
||||||
|
print("Noise RMS:", np.sqrt(np.mean(noise_mono**2)))
|
||||||
|
print("Raw RMS:", np.sqrt(np.mean(raw_mono**2)))
|
||||||
|
|
||||||
|
# Apply noise reduction
|
||||||
|
filtered_mono = nr.reduce_noise(y=raw_mono,
|
||||||
|
sr=sr,
|
||||||
|
y_noise=noise_mono,
|
||||||
|
prop_decrease=0.7)
|
||||||
|
|
||||||
|
# Mirror back to stereo
|
||||||
|
filtered_stereo = np.column_stack((filtered_mono, filtered_mono))
|
||||||
|
sf.write(NOISE_REDUCED_FILE, filtered_stereo, sr)
|
||||||
|
print(f"Saved {NOISE_REDUCED_FILE}")
|
||||||
|
|
@ -0,0 +1,76 @@
|
||||||
|
import wave
|
||||||
|
import numpy as np
|
||||||
|
from scipy.signal import butter, lfilter
|
||||||
|
|
||||||
|
FIFO_PATH = "/tmp/esp32_audio"
|
||||||
|
RAW_FILE = "recorded_audio.wav"
|
||||||
|
FILTERED_FILE = "recorded_audio_filtered.wav"
|
||||||
|
|
||||||
|
SAMPLE_RATE = 16000
|
||||||
|
CHANNELS = 2
|
||||||
|
BYTES_PER_SAMPLE = 2
|
||||||
|
|
||||||
|
# --- DSP helper: smoother band-pass filter ---
|
||||||
|
def butter_bandpass(lowcut, highcut, fs, order=4):
|
||||||
|
nyq = 0.5 * fs
|
||||||
|
low = lowcut / nyq
|
||||||
|
high = highcut / nyq
|
||||||
|
b, a = butter(order, [low, high], btype="band")
|
||||||
|
return b, a
|
||||||
|
|
||||||
|
# Keep filter state across chunks (avoids clicks)
|
||||||
|
b, a = butter_bandpass(120, 6000, SAMPLE_RATE, order=2)
|
||||||
|
zi = [np.zeros(max(len(a), len(b)) - 1) for _ in range(CHANNELS)]
|
||||||
|
|
||||||
|
def bandpass_filter(data, channel):
|
||||||
|
global zi
|
||||||
|
filtered, zi[channel] = lfilter(b, a, data, zi=zi[channel])
|
||||||
|
return filtered
|
||||||
|
|
||||||
|
# --- Configure WAV writers ---
|
||||||
|
raw_wav = wave.open(RAW_FILE, "wb")
|
||||||
|
raw_wav.setnchannels(CHANNELS)
|
||||||
|
raw_wav.setsampwidth(BYTES_PER_SAMPLE)
|
||||||
|
raw_wav.setframerate(SAMPLE_RATE)
|
||||||
|
|
||||||
|
filtered_wav = wave.open(FILTERED_FILE, "wb")
|
||||||
|
filtered_wav.setnchannels(CHANNELS)
|
||||||
|
filtered_wav.setsampwidth(BYTES_PER_SAMPLE)
|
||||||
|
filtered_wav.setframerate(SAMPLE_RATE)
|
||||||
|
|
||||||
|
with open(FIFO_PATH, "rb") as f:
|
||||||
|
print(f"Recording from {FIFO_PATH} into {RAW_FILE} and {FILTERED_FILE}...")
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
data = f.read(8000) # ~0.125s stereo
|
||||||
|
if not data:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# --- Write raw ---
|
||||||
|
raw_wav.writeframes(data)
|
||||||
|
|
||||||
|
# --- Convert to numpy ---
|
||||||
|
audio_np = np.frombuffer(data, dtype=np.int16).reshape(-1, CHANNELS)
|
||||||
|
|
||||||
|
# --- Filter each channel with persistent state ---
|
||||||
|
filtered_channels = []
|
||||||
|
for ch in range(CHANNELS):
|
||||||
|
filtered_channels.append(bandpass_filter(audio_np[:, ch], ch))
|
||||||
|
filtered_np = np.stack(filtered_channels, axis=1).astype(np.int16)
|
||||||
|
|
||||||
|
GAIN = 6.0 # 2x louder
|
||||||
|
|
||||||
|
# --- Apply gain after filtering ---
|
||||||
|
filtered_np = np.stack(filtered_channels, axis=1)
|
||||||
|
|
||||||
|
# Scale and clip to int16 range
|
||||||
|
filtered_np = np.clip(filtered_np * GAIN, -32768, 32767).astype(np.int16)
|
||||||
|
# --- Write filtered ---
|
||||||
|
filtered_wav.writeframes(filtered_np.tobytes())
|
||||||
|
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print("\nStopping recording.")
|
||||||
|
finally:
|
||||||
|
raw_wav.close()
|
||||||
|
filtered_wav.close()
|
||||||
|
print(f"Saved {RAW_FILE} and {FILTERED_FILE}")
|
||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1,22 @@
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <rnnoise.h>
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
DenoiseState *st = rnnoise_create(NULL); // pass NULL for default model
|
||||||
|
if (!st) {
|
||||||
|
fprintf(stderr, "rnnoise_create failed\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
float in[480];
|
||||||
|
float out[480];
|
||||||
|
memset(in, 0, sizeof(in));
|
||||||
|
memset(out, 0, sizeof(out));
|
||||||
|
|
||||||
|
float vad = rnnoise_process_frame(st, out, in);
|
||||||
|
printf("VAD: %f\n", vad);
|
||||||
|
|
||||||
|
rnnoise_destroy(st);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,110 @@
|
||||||
|
import wave
|
||||||
|
import numpy as np
|
||||||
|
import ctypes
|
||||||
|
from ctypes import c_void_p, c_float, POINTER
|
||||||
|
from ctypes.util import find_library
|
||||||
|
from scipy.signal import butter, lfilter
|
||||||
|
|
||||||
|
FIFO_PATH = "/tmp/esp32_audio"
|
||||||
|
RAW_FILE = "raw_mono_48k.wav"
|
||||||
|
DENOISED_FILE = "denoised_mono_48k.wav"
|
||||||
|
|
||||||
|
IN_SR = 16000
|
||||||
|
TARGET_SR = 48000
|
||||||
|
CHANNELS_IN = 2
|
||||||
|
BYTES_PER_SAMPLE = 2
|
||||||
|
FRAME_SIZE = 480 # RNNoise frame size at 48kHz
|
||||||
|
IN_FRAME_16K = 160 # 160 samples @16kHz → upsample ×3 → 480 @48kHz
|
||||||
|
|
||||||
|
# --- High-pass filter design ---
|
||||||
|
def highpass_filter(data, cutoff=100, fs=TARGET_SR, order=4):
|
||||||
|
b, a = butter(order, cutoff / (0.5 * fs), btype='high', analog=False)
|
||||||
|
return lfilter(b, a, data)
|
||||||
|
|
||||||
|
# --- Simple linear upsample 16k → 48k (factor 3) ---
|
||||||
|
def upsample3(x):
|
||||||
|
out = np.empty(len(x)*3, dtype=np.float32)
|
||||||
|
out[0::3] = x
|
||||||
|
out[1::3] = (2*x + np.append(x[1:], x[-1]))/3.0
|
||||||
|
out[2::3] = (x + np.append(x[1:], x[-1]))/2.0
|
||||||
|
return out
|
||||||
|
|
||||||
|
# --- Load RNNoise ---
|
||||||
|
libname = find_library("rnnoise")
|
||||||
|
if not libname:
|
||||||
|
raise RuntimeError("librnnoise not found. Run sudo ldconfig after install.")
|
||||||
|
rn = ctypes.CDLL(libname)
|
||||||
|
|
||||||
|
rn.rnnoise_create.argtypes = [c_void_p] # takes RNNModel* (NULL for default)
|
||||||
|
rn.rnnoise_create.restype = c_void_p
|
||||||
|
rn.rnnoise_destroy.argtypes = [c_void_p]
|
||||||
|
rn.rnnoise_process_frame.argtypes = [c_void_p,
|
||||||
|
POINTER(c_float),
|
||||||
|
POINTER(c_float)]
|
||||||
|
rn.rnnoise_process_frame.restype = c_float
|
||||||
|
|
||||||
|
st = rn.rnnoise_create(None) # NULL = default model
|
||||||
|
|
||||||
|
# --- Configure WAV writers ---
|
||||||
|
raw_wav = wave.open(RAW_FILE, "wb")
|
||||||
|
raw_wav.setnchannels(1)
|
||||||
|
raw_wav.setsampwidth(2)
|
||||||
|
raw_wav.setframerate(TARGET_SR)
|
||||||
|
|
||||||
|
den_wav = wave.open(DENOISED_FILE, "wb")
|
||||||
|
den_wav.setnchannels(1)
|
||||||
|
den_wav.setsampwidth(2)
|
||||||
|
den_wav.setframerate(TARGET_SR)
|
||||||
|
|
||||||
|
buf = np.empty((0,), dtype=np.int16)
|
||||||
|
|
||||||
|
print(f"Recording {FIFO_PATH} → {RAW_FILE}, {DENOISED_FILE}")
|
||||||
|
try:
|
||||||
|
with open(FIFO_PATH, "rb") as f:
|
||||||
|
while True:
|
||||||
|
data = f.read(IN_FRAME_16K * BYTES_PER_SAMPLE * CHANNELS_IN)
|
||||||
|
if not data:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Downmix stereo → mono @16k
|
||||||
|
stereo = np.frombuffer(data, dtype=np.int16).reshape(-1, CHANNELS_IN)
|
||||||
|
mono16 = stereo.mean(axis=1).astype(np.int16)
|
||||||
|
|
||||||
|
buf = np.concatenate([buf, mono16])
|
||||||
|
|
||||||
|
# Process when we have multiples of 160 samples
|
||||||
|
while len(buf) >= IN_FRAME_16K:
|
||||||
|
frame16 = buf[:IN_FRAME_16K].astype(np.float32) / 32768.0
|
||||||
|
buf = buf[IN_FRAME_16K:]
|
||||||
|
|
||||||
|
# Upsample to 48kHz (480 samples)
|
||||||
|
frame48 = upsample3(frame16)
|
||||||
|
|
||||||
|
# --- Apply high-pass filter ---
|
||||||
|
frame48 = highpass_filter(frame48, cutoff=100, fs=TARGET_SR)
|
||||||
|
|
||||||
|
frame48 = np.ascontiguousarray(frame48, dtype=np.float32)
|
||||||
|
|
||||||
|
# --- Write raw upsampled mono (with HPF) ---
|
||||||
|
raw_wav.writeframes(
|
||||||
|
np.clip(frame48 * 32767.0, -32768, 32767).astype(np.int16).tobytes()
|
||||||
|
)
|
||||||
|
|
||||||
|
# --- Denoise ---
|
||||||
|
out48 = np.zeros(FRAME_SIZE, dtype=np.float32)
|
||||||
|
rn.rnnoise_process_frame(
|
||||||
|
st,
|
||||||
|
out48.ctypes.data_as(POINTER(c_float)),
|
||||||
|
frame48.ctypes.data_as(POINTER(c_float))
|
||||||
|
)
|
||||||
|
|
||||||
|
den_wav.writeframes(
|
||||||
|
np.clip(out48 * 32767.0, -32768, 32767).astype(np.int16).tobytes()
|
||||||
|
)
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
pass
|
||||||
|
finally:
|
||||||
|
raw_wav.close()
|
||||||
|
den_wav.close()
|
||||||
|
rn.rnnoise_destroy(st)
|
||||||
|
print(f"Saved {RAW_FILE} and {DENOISED_FILE}")
|
||||||
Loading…
Reference in New Issue