This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from gpt_index import SimpleDirectoryReader, GPTListIndex, GPTSimpleVectorIndex, LLMPredictor, PromptHelper | |
from langchain.chat_models import ChatOpenAI | |
import gradio as gr | |
import sys | |
import os | |
os.environ["OPENAI_API_KEY"] = '#unique_id' | |
def construct_index(directory_path): | |
max_input_size = 4096 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sounddevice as sd | |
import soundfile as sf | |
samplerate = 16000 | |
duration = 1 # seconds | |
filename = 'mytest.wav' | |
print("start recording") | |
mydata = sd.rec(int(samplerate * duration), samplerate=samplerate, | |
channels=1, blocking=True) | |
print("end") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
duration_of_recordings=[] | |
for label in labels: | |
waves = [f for f in os.listdir(audio_path + '/'+ label) if f.endswith('.wav')] | |
for wav in waves: | |
sample_rate, samples = wavfile.read(audio_path + '/' + label + '/' + wav) | |
duration_of_recordings.append(float(len(samples)/sample_rate)) | |
plt.hist(np.array(duration_of_recordings)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
audio_path = '../dataset/audio/' | |
samples, sample_rate = librosa.load(audio_path+'hello/001.wav', sr = 16000) | |
fig = plt.figure(figsize=(14, 8)) | |
ax1 = fig.add_subplot(211) | |
ax1.set_title('Raw wave of ' + '../dataset/audio/1_001.wav') | |
ax1.set_xlabel('Time') | |
ax1.set_ylabel('Amplitude') | |
ax1.plot(np.linspace(0, sample_rate/len(samples), sample_rate), samples) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
#for audio processing | |
import librosa | |
from scipy.io import wavfile | |
import IPython.display as ipd | |
#for visualization via plots | |
import matplotlib.pyplot as plt | |
#matrix related functions | |
import numpy as np |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
#for audio processing | |
import librosa | |
from scipy.io import wavfile | |
import IPython.display as ipd | |
#for visualization via plots | |
import matplotlib.pyplot as plt | |
#matrix related functions | |
import numpy as np |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
y, sr = librosa.load(librosa.util.example_audio_file()) | |
# Approximate maximum frequencies with roll_percent=0.85 (default) | |
rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr) | |
# Approximate minimum frequencies with roll_percent=0.1 | |
rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr, roll_percent=0.1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
x, sr = librosa.load('audio.wav') | |
ipd.Audio(x, rate=sr) | |
hop_length = 512 | |
# returns normalized energy for each chroma bin at each frame. | |
chromagram = librosa.feature.chroma_stft(x, sr=sr, hop_length=hop_length) | |
plt.figure(figsize=(15, 5)) | |
librosa.display.specshow(chromagram, x_axis='time', y_axis='chroma', hop_length=hop_length, cmap='coolwarm') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib.pyplot as plt | |
y, sr = librosa.load(audio_file.wav, offset=30, duration=10) | |
y_filt = librosa.effects.preemphasis(y) | |
# and plot the results for comparison | |
S_orig = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max) | |
S_preemph = librosa.amplitude_to_db(np.abs(librosa.stft(y_filt)), ref=np.max) | |
librosa.display.specshow(S_orig, y_axis='log', x_axis='time') | |
plt.title('Original signal') | |
librosa.display.specshow(S_preemph, y_axis='log', x_axis='time') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#min = minimum value for each row of the vector signal | |
#max = maximum value for each row of the vector signal | |
def normalize(x, axis=0): | |
return sklearn.preprocessing.minmax_scale(x, axis=axis) | |
#Plotting the Spectral Centroid along the waveform | |
librosa.display.waveplot(x, sr=sr, alpha=0.4) | |
plt.plot(t, normalize(spectral_centroids), color='r') |
NewerOlder