Skip to content

Instantly share code, notes, and snippets.

@PandaWhoCodes
Created October 29, 2017 11:29
Show Gist options
  • Save PandaWhoCodes/9f3dc05faee761149842e43b56e6ee8c to your computer and use it in GitHub Desktop.
Save PandaWhoCodes/9f3dc05faee761149842e43b56e6ee8c to your computer and use it in GitHub Desktop.
Noise reduction using pyaudio documentation code
"""
Measure the frequencies coming in through the microphone
Patchwork of wire_full.py from pyaudio tests and spectrum.py from Chaco examples
"""
import pyaudio
import numpy as np
import scipy.signal
CHUNK = 1024 * 2
WIDTH = 2
DTYPE = np.int16
MAX_INT = 32768.0
CHANNELS = 1
RATE = 11025 * 1
RECORD_SECONDS = 20
j = np.complex(0, 1)
p = pyaudio.PyAudio()
stream = p.open(format=p.get_format_from_width(WIDTH),
channels=CHANNELS,
rate=RATE,
input=True,
output=True,
frames_per_buffer=CHUNK)
print("Recording Audio...")
# initialize filter variables
fir = np.zeros(CHUNK * 2)
fir[:(2 * CHUNK)] = 1.
fir /= fir.sum()
fir_last = fir
avg_freq_buffer = np.zeros(CHUNK)
obj = -np.inf
t = 10
# initialize sample buffer
buffer = np.zeros(CHUNK * 2)
try:
while True:
# read audio
string_audio_data = stream.read(CHUNK)
audio_data = np.fromstring(string_audio_data, dtype=DTYPE)
normalized_data = audio_data / MAX_INT
freq_data = np.fft.fft(normalized_data)
# synthesize audio
buffer[CHUNK:] = np.random.randn(CHUNK)
freq_buffer = np.fft.fft(buffer)
freq_fir = np.fft.fft(fir)
freq_synth = freq_fir * freq_buffer
synth = np.real(np.fft.ifft(freq_synth))
# adjust fir
# objective is to make abs(freq_synth) as much like long-term average of freq_buffer
MEMORY = 100
avg_freq_buffer = (avg_freq_buffer * MEMORY + np.abs(freq_data)) / (MEMORY + 1)
obj_last = obj
obj = np.real(np.dot(avg_freq_buffer[1:51], np.abs(freq_synth[1:100:2])) / np.dot(freq_synth[1:100:2],
np.conj(freq_synth[1:100:2])))
if obj > obj_last:
fir_last = fir
fir = fir_last.copy()
# adjust filter in frequency space
freq_fir = np.fft.fft(fir)
# t += np.clip(np.random.randint(3)-1, 0, 64)
t = np.random.randint(100)
freq_fir[t] += np.random.randn() * .05
# transform frequency space filter to time space, click-free
fir = np.real(np.fft.ifft(freq_fir))
fir[:CHUNK] *= np.linspace(1., 0., CHUNK) ** .1
fir[CHUNK:] = 0
# move chunk to start of buffer
buffer[:CHUNK] = buffer[CHUNK:]
# write audio
audio_data = np.array(np.round_(synth[CHUNK:] * MAX_INT), dtype=DTYPE)
string_audio_data = audio_data.tostring()
stream.write(string_audio_data, CHUNK)
finally:
stream.stop_stream()
stream.close()
p.terminate()
@Singhal-harsh
Copy link

Singhal-harsh commented Jul 10, 2019

Hey, can you please help me with the following error that I am encountering with your code:

DeprecationWarning: The binary mode of fromstring is deprecated, as it behaves surprisingly on unicode inputs. Use frombuffer instead
audio_data = np.fromstring(string_audio_data, dtype=DTYPE)
RuntimeWarning: invalid value encountered in true_divide
obj = np.real(np.dot(avg_freq_buffer[1:51], np.abs(freq_synth[1:100:2])) / np.dot(freq_synth[1:100:2], np.conj(freq_synth[1:100:2])))

@PandaWhoCodes
Copy link
Author

The code is pretty old and should work despite the warnings.
Can you give the full stack trace for me to be able to help you better

@Singhal-harsh
Copy link

Singhal-harsh commented Jul 10, 2019

import pyaudio
import numpy as np
import scipy.signal
import wave

CHUNK = 1024

FORMAT = pyaudio.paInt16

WIDTH = 2
DTYPE = np.int16
MAX_INT = 32768.0

CHANNELS = 1
RATE = 16000
RECORD_SECONDS = 7

WAVE_OUTPUT_FILENAME = "Speech.wav"

j = np.complex(0,1)


p = pyaudio.PyAudio()
stream = p.open(format=p.get_format_from_width(WIDTH),
                channels=CHANNELS,
                rate=RATE,
                input=True,
                output=True,
                frames_per_buffer=CHUNK)

print("* recording")

# initialize filter variables
fir = np.zeros(CHUNK * 2)
fir[:(2*CHUNK)] = 1.
fir /= fir.sum()

fir_last = fir
avg_freq_buffer = np.zeros(CHUNK)
obj = -np.inf
t = 10

# initialize sample buffer
buffer = np.zeros(CHUNK * 2)

#for i in np.arange(RATE / CHUNK * RECORD_SECONDS):
while True:
    # read audio
    frames = []
    string_audio_data = stream.read(CHUNK)
    audio_data = np.fromstring(string_audio_data, dtype=DTYPE)
    normalized_data = audio_data / MAX_INT
    freq_data = np.fft.fft(normalized_data)

    # synthesize audio
    buffer[CHUNK:] = np.random.randn(CHUNK)
    freq_buffer = np.fft.fft(buffer)
    freq_fir = np.fft.fft(fir)
    freq_synth = freq_fir * freq_buffer
    synth = np.real(np.fft.ifft(freq_synth))

    # adjust fir
    # objective is to make abs(freq_synth) as much like long-term average of freq_buffer
    MEMORY=100
    avg_freq_buffer = (avg_freq_buffer*MEMORY + \
                           np.abs(freq_data)) / (MEMORY+1)
    obj_last = obj

    obj = np.real(np.dot(avg_freq_buffer[1:51], np.abs(freq_synth[1:100:2])) / np.dot(freq_synth[1:100:2], np.conj(freq_synth[1:100:2])))
    if obj > obj_last:
        fir_last = fir
    fir = fir_last.copy()

    # adjust filter in frequency space
    freq_fir = np.fft.fft(fir)
    #t += np.clip(np.random.randint(3)-1, 0, 64)
    t = np.random.randint(100)
    
    freq_fir[t] += np.random.randn()*.05

    # transform frequency space filter to time space, click-free
    fir = np.real(np.fft.ifft(freq_fir))
    fir[:CHUNK] *= np.linspace(1., 0., CHUNK)**.1
    fir[CHUNK:] = 0


    # move chunk to start of buffer
    buffer[:CHUNK] = buffer[CHUNK:]
    
    # write audio
    audio_data = np.array(np.round_(synth[CHUNK:] * MAX_INT), dtype=DTYPE)
    string_audio_data = audio_data.tostring()
    frames.append(string_audio_data)
    stream.write(string_audio_data, CHUNK)

print("* done")

stream.stop_stream()
stream.close()

p.terminate()

waveFile = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
waveFile.setnchannels(CHANNELS)
waveFile.setsampwidth(p.get_sample_size(FORMAT))
waveFile.setframerate(RATE)
waveFile.writeframes(b''.join(frames))
waveFile.close()

^Above are the changes I made in your code for my use.
If not this, Can you help me with noise reduction through python on wav file, I have to use it for speech recognition, and due to background noise, accuracy of my speech recognition takes a dive.
Anyways, Following is the error I get when I try to run the above code:


runfile('C:/Users/hs45858/NoiseReduction.py', wdir='C:/Users/hs45858')
* recording
C:/Users/hs45858/NoiseReduction.py:58: DeprecationWarning: The binary mode of fromstring is deprecated, as it behaves surprisingly on unicode inputs. Use frombuffer instead
  audio_data = np.fromstring(string_audio_data, dtype=DTYPE)
C:/Users/hs45858/NoiseReduction.py:76: RuntimeWarning: invalid value encountered in true_divide
  obj = np.real(np.dot(avg_freq_buffer[1:51], np.abs(freq_synth[1:100:2])) / np.dot(freq_synth[1:100:2], np.conj(freq_synth[1:100:2])))
Traceback (most recent call last):

  File "<ipython-input-5-4d0a5b75ddd0>", line 1, in <module>
    runfile('C:/Users/hs45858/NoiseReduction.py', wdir='C:/Users/hs45858')

  File "C:\Program Files\Anaconda3\lib\site-packages\spyder\utils\site\sitecustomize.py", line 705, in runfile
    execfile(filename, namespace)

  File "C:\Program Files\Anaconda3\lib\site-packages\spyder\utils\site\sitecustomize.py", line 102, in execfile
    exec(compile(f.read(), filename, 'exec'), namespace)

  File "C:/Users/hs45858/NoiseReduction.py", line 101, in <module>
    stream.write(string_audio_data, CHUNK)

  File "C:\Users\hs45858\AppData\Local\conda\conda\envs\harsh\Lib\site-packages\pyaudio.py", line 586, in write
    exception_on_underflow)

KeyboardInterrupt

@juzdepeche
Copy link

juzdepeche commented Aug 18, 2019

Your code is trying to divide by 0. Add this (numpy as np) :
np.seterr(divide='ignore', invalid='ignore')

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment