Skip to content

Instantly share code, notes, and snippets.

learning_rate = 2e-4
optimizer = optim.Adam(resnet_model.parameters(), lr=learning_rate)
epochs = 50
loss_fn = nn.CrossEntropyLoss()
resnet_train_losses=[]
resnet_valid_losses=[]
def lr_decay(optimizer, epoch):
if epoch%10==0:
new_lr = learning_rate / (10**(epoch//10))
optimizer = setlr(optimizer, new_lr)
from torchvision.models import resnet34
import torch
import torch.nn as nn
import torch.optim as optim
if torch.cuda.is_available():
device=torch.device('cuda:0')
else:
device=torch.device('cpu')
resnet_model = resnet34(pretrained=True)
resnet_model.fc = nn.Linear(512,50)
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
class ESC50Data(Dataset):
def __init__(self, base, df, in_col, out_col):
self.df = df
self.data = []
self.labels = []
self.c2i={}
def spec_to_image(spec, eps=1e-6):
mean = spec.mean()
std = spec.std()
spec_norm = (spec - mean) / (std + eps)
spec_min, spec_max = spec_norm.min(), spec_norm.max()
spec_scaled = 255 * (spec_norm - spec_min) / (spec_max - spec_min)
spec_scaled = spec_scaled.astype(np.uint8)
return spec_scaled
import numpy as np
import librosa
def get_melspectrogram_db(file_path, sr=None, n_fft=2048, hop_length=512, n_mels=128, fmin=20, fmax=8300, top_db=80):
wav,sr = librosa.load(file_path,sr=sr)
if wav.shape[0]<5*sr:
wav=np.pad(wav,int(np.ceil((5*sr-wav.shape[0])/2)),mode='reflect')
else:
wav=wav[:5*sr]
spec=librosa.feature.melspectrogram(wav, sr=sr, n_fft=n_fft,
hop_length=hop_length,n_mels=n_mels,fmin=fmin,fmax=fmax)