雲夢 HudsonHuang

## TransLayer.py
import torch
import math
from  torch.nn import functional as F
from torch import nn


class PositionalEncoding(nn.Module):
    def __init__(self, d_model, height, width, dropout=0.1):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)

## spectral_feature_compare.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                HudsonHuang
                / spectral_feature_compare.md
            
            
              Last active
              December 31, 2019 03:43
            
              
                spectral_feature_compare
              
          
    | Feature | nnAudio | torch.stft | kapre | torchaudio | tf.signal(or else tf. stuff) | torch-stft | librosa |
| ---|---|---|---|---|---|---|---|--- |
| Trainable | 1  | 0 | 1 | 0 | 0 | 1 | 0 |
| ModelConvert* | 1  | 0 | 1 | 0 | 0 | 1 | 0 |
| Speed(Need test**) | 0  | 0 | 0 | 0 | 0 | 0 | 0 |
| Differentiable(Not sure**) | 1  | 1 | 1 | 1 | 1 | 1 | 0 |
| Mel | 1  | 0 | 1 | 1 | 1 | 0 | 1 |
| MFCC | 0  | 0 | 0 | 1 | 1 | 0 | 1 |
| CQT | 1  | 0 | 0 | 0 | 0 | 0 | 1 |
| GPU support | 1 | 1 | 1 | 1 | 1 | 1 | 0 |

  
## model_activation_visualization.py
import PIL
import torch
import torchvision
import numpy as np
import matplotlib.pyplot as plt


def get_image(path, imsize=-1):
    """Load an image and resize to a cpecific size.

## benchmark.py
## linux + HDD
# soundfile r: 111.3170223236084
# soundfile w: 21.47102665901184
# librosa r: 27.82967972755432
# librosa w: 24.776712656021118

## windows + SSD
# soundfile r: 12.918063640594482
# soundfile w: 11.726674318313599
# librosa r: 16.433390378952026

## audio_format.py
"""Helper functions for working with audio files in NumPy."""
"""some code borrowed from https://github.com/mgeier/python-audio/blob/master/audio-files/utility.py"""

import numpy as np
import contextlib
import librosa
import struct
import soundfile

def float_to_byte(sig):

## do_ifft_using_fft.py
from scipy.fftpack import fft
import numpy as np

# random complex array of N = 4
a = np.array([11.+11.4j, 2.47+2.3j, 30.89+73.1j, 44.+16.3j])
print(a)

# do FFT to a to transform to frequency domain
da = fft(a)

## grad.py
# FFT 的 一阶，二阶和n阶导数：https://math.mit.edu/~stevenj/fft-deriv.pdf
# DFT的导数：https://math.stackexchange.com/a/1658364/684858

import tensorflow as tf
import numpy as np
import torch
from torch.autograd import gradcheck, Variable

# mag loss
def mag(x):

## mixup.py
import numpy as np
import tensorflow as tf

def mixup_np(features, labels, alpha=0.1):
    # numpy version
    num_examples = features.shape[0]
    num_class = labels.shape[-1]
    mix = np.random.beta(alpha, alpha, size=[num_examples])
    features = np.swapaxes(features, 0, -1)
    features = features * mix + features[::-1] * (np.ones_like(mix) - mix)

## kaldi_install.sh
git clone https://github.com/kaldi-asr/kaldi.git kaldi --origin upstream
cd kaldi/tools
make -j 4
# NOTE: If error happend, run this command and apt-get install necessary packages：
# sh ./extras/check_dependencies.sh
cd ../src/
./configure --shared
make depend -j 4
make -j 4
./run.sh

## Pytorch performance guide.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              5 stars
            
          
                HudsonHuang
                / Pytorch performance guide.md
            
            
              Last active
              March 25, 2020 02:43
            
              
                Pytorch performance guide
              
          
Using CUDA in correct way：


确定性卷积：（把所有操作的seed=0，以便重现，会变慢）
torch.backends.cudnn.deterministic
https://oldpan.me/archives/pytorch-conmon-problem-in-training
添加torch.cuda.get_device_name和torch.cuda.get_device_capability实现如下功能。例：


torch.cuda.get_device_name(0)
'Quadro GP100'
torch.cuda.get_device_capability(0)
(6, 0)
	import torch
	import math
	from torch.nn import functional as F
	from torch import nn


	class PositionalEncoding(nn.Module):
	def __init__(self, d_model, height, width, dropout=0.1):
	super().__init__()
	self.dropout = nn.Dropout(p=dropout)
	import PIL
	import torch
	import torchvision
	import numpy as np
	import matplotlib.pyplot as plt


	def get_image(path, imsize=-1):
	"""Load an image and resize to a cpecific size.
	## linux + HDD
	# soundfile r: 111.3170223236084
	# soundfile w: 21.47102665901184
	# librosa r: 27.82967972755432
	# librosa w: 24.776712656021118

	## windows + SSD
	# soundfile r: 12.918063640594482
	# soundfile w: 11.726674318313599
	# librosa r: 16.433390378952026
	"""Helper functions for working with audio files in NumPy."""
	"""some code borrowed from https://github.com/mgeier/python-audio/blob/master/audio-files/utility.py"""

	import numpy as np
	import contextlib
	import librosa
	import struct
	import soundfile

	def float_to_byte(sig):
	from scipy.fftpack import fft
	import numpy as np

	# random complex array of N = 4
	a = np.array([11.+11.4j, 2.47+2.3j, 30.89+73.1j, 44.+16.3j])
	print(a)

	# do FFT to a to transform to frequency domain
	da = fft(a)
	# FFT 的一阶，二阶和n阶导数：https://math.mit.edu/~stevenj/fft-deriv.pdf
	# DFT的导数：https://math.stackexchange.com/a/1658364/684858

	import tensorflow as tf
	import numpy as np
	import torch
	from torch.autograd import gradcheck, Variable

	# mag loss
	def mag(x):
	import numpy as np
	import tensorflow as tf

	def mixup_np(features, labels, alpha=0.1):
	# numpy version
	num_examples = features.shape[0]
	num_class = labels.shape[-1]
	mix = np.random.beta(alpha, alpha, size=[num_examples])
	features = np.swapaxes(features, 0, -1)
	features = features * mix + features[::-1] * (np.ones_like(mix) - mix)
	git clone https://github.com/kaldi-asr/kaldi.git kaldi --origin upstream
	cd kaldi/tools
	make -j 4
	# NOTE: If error happend, run this command and apt-get install necessary packages：
	# sh ./extras/check_dependencies.sh
	cd ../src/
	./configure --shared
	make depend -j 4
	make -j 4
	./run.sh