neilslater/gist:5476045

## gistfile1.rb
require 'narray'
require 'fftw3'
require 'raiff'
require 'rmagick'

# This class models audio power spectrum generation from audio sources (as .aiff files)
# An instance of this class represents a single audio clip and its spectrogram
class AudioSpectrum
  # TODO: Over time, we want to improve on these crude hard-coded values, and either take them
  # as params (e.g. number of windows, min/max frequency to store), or read from the audio file
  # In addition, we probably need to work with files large enough that slurping them in one
  # go is too much. There should be a slurp max size.
  SAMPLE_TIME = 0.0005
  SAMPLE_FREQ = 2000.0
  SAMPLE_MAX = 128 * 256
  SAMPLE_SCALE = 1.0 / SAMPLE_MAX
  NSAMPLES = 4000
  # This window and "height" takes us to half Nyquist limit, or 500Hz, for the spectrogram
  DEFAULT_FFT_WINDOW_SIZE = 256
  DEFAULT_WINDOW_STEP_SIZE = 32
  DEFAULT_NUM_WINDOWS = 64

  DEFAULT_LOWEST_FREQ = 0
  DEFAULT_HIGHEST_FREQ = 64

  PI = Math.acos(0.0) * 2

  @@hann_window = nil
  @@bh_window = nil
  @@diamond = nil

  # file must be a mono, 2kHz, 2s audio clip in .aiff format. More formats later (maybe)
  def initialize file = nil
    # @offsets, @audio and @spectrogram all assume that the entire audio is processed at once,
    # we probably want something longer-term that handles just parts of audio streams. Probably a fixed
    # offset in samples or time . . .
    @fft_window_size = DEFAULT_FFT_WINDOW_SIZE
    @sample_start = 0
    @sample_jitter = 1.0
    @window_step_size = DEFAULT_WINDOW_STEP_SIZE
    @window_count = DEFAULT_NUM_WINDOWS

    @lowest_freq = DEFAULT_LOWEST_FREQ
    @highest_freq = DEFAULT_HIGHEST_FREQ

    @window_type = :hann

    @audio = NArray.float( NSAMPLES )
    if file
      raise "File not found #{file}" unless File.exists?(file)
      load_audio file
    end

    @normalise_type = :whole
  end

  attr_accessor :fft_window_size

  # the sample at which to start the first FFT window, defaults to 0
  attr_accessor :sample_start

  # size of random offset to each sample, expressed in multiples of smallest same size, default 1.0
  # a value of 1.0 should reduce systemic noise caused by limited bit depth (at the expense of introducing white noise)
  attr_accessor :sample_jitter

  # number of samples offset for each window with frequency breakdown, default 32. Floats are allowed
  attr_accessor :window_step_size

  # number of samples offset for each window with frequency breakdown, default 64
  attr_accessor :window_count

  # lowest frequency bin to use for the spectrogram, default 0
  attr_accessor :lowest_freq

  # highest frequency bin to use for the spectrogram, default 64
  attr_accessor :highest_freq

  # type of normalisation to use for the spectrogram, default :whole. Can also be :none, :freq or :time
  attr_accessor :normalise_type

  # type of windowing function, choice of :hann or :blackman_harris
  attr_accessor :window_type

  # number of frequencies, default 65
  attr_reader :spectrogram_height

  # an NArray of 4000 audio samples, range from to -1.0 to 1.0
  attr_reader :audio

  # Integer number of audio samples used to create each timeslice of data. Frequency resolution is better with larger windows,
  # but time resolution is worse.
  attr_reader :fft_window_size

  # a 2-dimensional NArray of window_count x spectrogram_height frequencies, giving normalised relative intensity at each frequency.
  def spectrogram
    @spectrogram_height = 1 + @highest_freq - @lowest_freq
    @offsets = calc_offsets @sample_start, @window_step_size, @window_count
    @spectrogram = NArray.float( @window_count, @spectrogram_height )
    normalised_spectrogram
    @spectrogram
  end

  # writes spectrogram to image file, time series left to right, highest frequency at top, one pixel
  # per "bin"
  def write_spectrogram_graphic filename
    spectrogram unless @spectrogram
    width = @offsets.size
    height = @spectrogram_height

    img = Magick::Image.new(width, height)

    (0...width).each do |x|
      (0...height).each do |y|
        pixel_x = x
        pixel_y = @spectrogram_height - 1 - y
        pixel_colour = intensity_color( @spectrogram[x,y] )
        img.pixel_color( pixel_x, pixel_y, "rgb(#{pixel_colour.join(', ')})")
      end
    end
    img.write(filename)
  end

  private

  def intensity_color i
    gamma = 1.0/1.3
    l = 255.0 * ( i ** gamma )
    [ Integer(l), Integer(l), Integer(i * 255) ]
  end

  def load_audio full_path
    pos = 0
    aiff = Raiff.open( full_path )
    aiff.each_sample do |x|
      @audio[pos] = SAMPLE_SCALE * x[0]
      pos += 1
    end
    if @sample_jitter > 0.0
      @audio += jitter_noise
    end

    @audio
  end

  def fft_window window_id
    # Take a @fft_window_size-sized slice to analyse
    win_start = @offsets[window_id]
    win_end = win_start + @fft_window_size
    audio_window = @audio[win_start...win_end]

    # Apply a window function
    if @window_type == :hann
      ensure_hann_window
      audio_window = audio_window * @@hann_window
    elsif @window_type == :blackman_harris
      ensure_blackman_harris_window
      audio_window = audio_window * @@bh_window
    end

    freqs = FFTW3.fft(audio_window, -1)
    base_factor = 2.0/@fft_window_size
    @spectrogram[window_id,0...@spectrogram_height] = freqs[@lowest_freq...@lowest_freq+@spectrogram_height].abs * base_factor
    max = @spectrogram[window_id,0...@spectrogram_height].max

    if @normalise_type == :time
      norm_factor = 1.0/(max + 1.0e-6)
      @spectrogram[window_id,0...@spectrogram_height] *= norm_factor
    end
    max
  end

  def normalised_spectrogram
    max = 0.0
    # Collect data
    (0...@offsets.size).each do |window_id|
      v = fft_window( window_id )
      max = v if v > max
    end
    # Normalise spectrogram, highest value will be ~0.99999
    if @normalise_type == :whole
      norm_factor = 1.0/(max + 1.0e-6)
      @spectrogram *= norm_factor

    elsif @normalise_type == :freq
      (0...@spectrogram_height).each do |freq_id|
        freq_max = @spectrogram[0...@offsets.size,freq_id].max
        norm_factor = 1.0/(freq_max + 1.0e-6)
        @spectrogram[0...@offsets.size,freq_id] *= norm_factor
      end
    elsif @normalise_type == :centre_rect
      calc_weighted_mean_rect
      weighted_mean = (@spectrogram * @@diamond).sum
      norm_factor = 0.25/(weighted_mean + 1.0e-6)
      @spectrogram *= norm_factor
      # This clips the absoltue values
      @spectrogram[@spectrogram.gt 1.0] = 1.0
    end

    @spectrogram
  end

  def freq_samples freq_id
    @spectrogram[0...@offsets.size,freq_id]
  end

  def calc_offsets start, step, count
    (0...count).map do |x|
      (start + x * step).round
    end
  end

  def ensure_hann_window
    return if @@hann_window && @@hann_window.size == @fft_window_size
    @@hann_window = NArray.float( @fft_window_size )
    pi_invw = ( 2.0 * PI )/( @fft_window_size.to_f - 1.0 )
    (0...@fft_window_size).each do |i|
      @@hann_window[i] = 0.5 * (1.0 - Math.cos( i.to_f * pi_invw ))
    end
  end

  def ensure_blackman_harris_window
    return if @@bh_window && @@bh_window.size == @fft_window_size
    @@bh_window = NArray.float( @fft_window_size )
    # pi_invw -> 2pi/(N-1)
    pi_invw = ( 2.0 * PI )/( @fft_window_size.to_f - 1.0 )
    a0 = 0.35875
    a1 = 0.48829
    a2 = 0.14128
    a3 = 0.01168
    (0...@fft_window_size).each do |i|
      @@bh_window[i] = a0 -  a1 * Math.cos( i.to_f * pi_invw ) + a2 * Math.cos( (2 * i).to_f * pi_invw ) - a3 * Math.cos( (3 * i).to_f * pi_invw )
    end
  end

  # Creates a square shape of floats where the sum is 1.0, which can then be used to get a weighted central mean of a spectrogram
  def calc_weighted_mean_rect
    return @@diamond if @@diamond
    @@diamond = NArray.float( @window_count, @spectrogram_height )
    # This is placeholder code for the simplest central rect, covering very roughly the central 1/3 (by area) of the spectrogram
    lowest_window = Integer(@window_count * 0.21)
    highest_window = Integer(@window_count * 0.79) + 1

    lowest_freq = Integer(@spectrogram_height * 0.21)
    highest_freq = Integer(@spectrogram_height * 0.79) + 1

    npixels = (highest_window - lowest_window) * (highest_freq - lowest_freq)
    @@diamond[lowest_window..highest_window,lowest_freq..highest_freq] = 1.0/npixels
  end

  def jitter_noise
    jitter = ( NArray.float( NSAMPLES ).random() - 0.5 ) * ( SAMPLE_SCALE * @sample_jitter )
    jitter
  end

end
	require 'narray'
	require 'fftw3'
	require 'raiff'
	require 'rmagick'

	# This class models audio power spectrum generation from audio sources (as .aiff files)
	# An instance of this class represents a single audio clip and its spectrogram
	class AudioSpectrum
	# TODO: Over time, we want to improve on these crude hard-coded values, and either take them
	# as params (e.g. number of windows, min/max frequency to store), or read from the audio file
	# In addition, we probably need to work with files large enough that slurping them in one
	# go is too much. There should be a slurp max size.
	SAMPLE_TIME = 0.0005
	SAMPLE_FREQ = 2000.0
	SAMPLE_MAX = 128 * 256
	SAMPLE_SCALE = 1.0 / SAMPLE_MAX
	NSAMPLES = 4000
	# This window and "height" takes us to half Nyquist limit, or 500Hz, for the spectrogram
	DEFAULT_FFT_WINDOW_SIZE = 256
	DEFAULT_WINDOW_STEP_SIZE = 32
	DEFAULT_NUM_WINDOWS = 64

	DEFAULT_LOWEST_FREQ = 0
	DEFAULT_HIGHEST_FREQ = 64

	PI = Math.acos(0.0) * 2

	@@hann_window = nil
	@@bh_window = nil
	@@diamond = nil

	# file must be a mono, 2kHz, 2s audio clip in .aiff format. More formats later (maybe)
	def initialize file = nil
	# @offsets, @audio and @spectrogram all assume that the entire audio is processed at once,
	# we probably want something longer-term that handles just parts of audio streams. Probably a fixed
	# offset in samples or time . . .
	@fft_window_size = DEFAULT_FFT_WINDOW_SIZE
	@sample_start = 0
	@sample_jitter = 1.0
	@window_step_size = DEFAULT_WINDOW_STEP_SIZE
	@window_count = DEFAULT_NUM_WINDOWS

	@lowest_freq = DEFAULT_LOWEST_FREQ
	@highest_freq = DEFAULT_HIGHEST_FREQ

	@window_type = :hann

	@audio = NArray.float( NSAMPLES )
	if file
	raise "File not found #{file}" unless File.exists?(file)
	load_audio file
	end

	@normalise_type = :whole
	end

	attr_accessor :fft_window_size

	# the sample at which to start the first FFT window, defaults to 0
	attr_accessor :sample_start

	# size of random offset to each sample, expressed in multiples of smallest same size, default 1.0
	# a value of 1.0 should reduce systemic noise caused by limited bit depth (at the expense of introducing white noise)
	attr_accessor :sample_jitter

	# number of samples offset for each window with frequency breakdown, default 32. Floats are allowed
	attr_accessor :window_step_size

	# number of samples offset for each window with frequency breakdown, default 64
	attr_accessor :window_count

	# lowest frequency bin to use for the spectrogram, default 0
	attr_accessor :lowest_freq

	# highest frequency bin to use for the spectrogram, default 64
	attr_accessor :highest_freq

	# type of normalisation to use for the spectrogram, default :whole. Can also be :none, :freq or :time
	attr_accessor :normalise_type

	# type of windowing function, choice of :hann or :blackman_harris
	attr_accessor :window_type

	# number of frequencies, default 65
	attr_reader :spectrogram_height

	# an NArray of 4000 audio samples, range from to -1.0 to 1.0
	attr_reader :audio

	# Integer number of audio samples used to create each timeslice of data. Frequency resolution is better with larger windows,
	# but time resolution is worse.
	attr_reader :fft_window_size

	# a 2-dimensional NArray of window_count x spectrogram_height frequencies, giving normalised relative intensity at each frequency.
	def spectrogram
	@spectrogram_height = 1 + @highest_freq - @lowest_freq
	@offsets = calc_offsets @sample_start, @window_step_size, @window_count
	@spectrogram = NArray.float( @window_count, @spectrogram_height )
	normalised_spectrogram
	@spectrogram
	end

	# writes spectrogram to image file, time series left to right, highest frequency at top, one pixel
	# per "bin"
	def write_spectrogram_graphic filename
	spectrogram unless @spectrogram
	width = @offsets.size
	height = @spectrogram_height

	img = Magick::Image.new(width, height)

	(0...width).each do \|x\|
	(0...height).each do \|y\|
	pixel_x = x
	pixel_y = @spectrogram_height - 1 - y
	pixel_colour = intensity_color( @spectrogram[x,y] )
	img.pixel_color( pixel_x, pixel_y, "rgb(#{pixel_colour.join(', ')})")
	end
	end
	img.write(filename)
	end

	private

	def intensity_color i
	gamma = 1.0/1.3
	l = 255.0 * ( i ** gamma )
	[ Integer(l), Integer(l), Integer(i * 255) ]
	end

	def load_audio full_path
	pos = 0
	aiff = Raiff.open( full_path )
	aiff.each_sample do \|x\|
	@audio[pos] = SAMPLE_SCALE * x[0]
	pos += 1
	end
	if @sample_jitter > 0.0
	@audio += jitter_noise
	end

	@audio
	end

	def fft_window window_id
	# Take a @fft_window_size-sized slice to analyse
	win_start = @offsets[window_id]
	win_end = win_start + @fft_window_size
	audio_window = @audio[win_start...win_end]

	# Apply a window function
	if @window_type == :hann
	ensure_hann_window
	audio_window = audio_window * @@hann_window
	elsif @window_type == :blackman_harris
	ensure_blackman_harris_window
	audio_window = audio_window * @@bh_window
	end

	freqs = FFTW3.fft(audio_window, -1)
	base_factor = 2.0/@fft_window_size
	@spectrogram[window_id,0...@spectrogram_height] = freqs[@lowest_freq...@lowest_freq+@spectrogram_height].abs * base_factor
	max = @spectrogram[window_id,0...@spectrogram_height].max

	if @normalise_type == :time
	norm_factor = 1.0/(max + 1.0e-6)
	@spectrogram[window_id,0...@spectrogram_height] *= norm_factor
	end
	max
	end

	def normalised_spectrogram
	max = 0.0
	# Collect data
	(0...@offsets.size).each do \|window_id\|
	v = fft_window( window_id )
	max = v if v > max
	end
	# Normalise spectrogram, highest value will be ~0.99999
	if @normalise_type == :whole
	norm_factor = 1.0/(max + 1.0e-6)
	@spectrogram *= norm_factor

	elsif @normalise_type == :freq
	(0...@spectrogram_height).each do \|freq_id\|
	freq_max = @spectrogram[0...@offsets.size,freq_id].max
	norm_factor = 1.0/(freq_max + 1.0e-6)
	@spectrogram[0...@offsets.size,freq_id] *= norm_factor
	end
	elsif @normalise_type == :centre_rect
	calc_weighted_mean_rect
	weighted_mean = (@spectrogram * @@diamond).sum
	norm_factor = 0.25/(weighted_mean + 1.0e-6)
	@spectrogram *= norm_factor
	# This clips the absoltue values
	@spectrogram[@spectrogram.gt 1.0] = 1.0
	end

	@spectrogram
	end

	def freq_samples freq_id
	@spectrogram[0...@offsets.size,freq_id]
	end

	def calc_offsets start, step, count
	(0...count).map do \|x\|
	(start + x * step).round
	end
	end

	def ensure_hann_window
	return if @@hann_window && @@hann_window.size == @fft_window_size
	@@hann_window = NArray.float( @fft_window_size )
	pi_invw = ( 2.0 * PI )/( @fft_window_size.to_f - 1.0 )
	(0...@fft_window_size).each do \|i\|
	@@hann_window[i] = 0.5 * (1.0 - Math.cos( i.to_f * pi_invw ))
	end
	end

	def ensure_blackman_harris_window
	return if @@bh_window && @@bh_window.size == @fft_window_size
	@@bh_window = NArray.float( @fft_window_size )
	# pi_invw -> 2pi/(N-1)
	pi_invw = ( 2.0 * PI )/( @fft_window_size.to_f - 1.0 )
	a0 = 0.35875
	a1 = 0.48829
	a2 = 0.14128
	a3 = 0.01168
	(0...@fft_window_size).each do \|i\|
	@@bh_window[i] = a0 - a1 * Math.cos( i.to_f * pi_invw ) + a2 * Math.cos( (2 * i).to_f * pi_invw ) - a3 * Math.cos( (3 * i).to_f * pi_invw )
	end
	end

	# Creates a square shape of floats where the sum is 1.0, which can then be used to get a weighted central mean of a spectrogram
	def calc_weighted_mean_rect
	return @@diamond if @@diamond
	@@diamond = NArray.float( @window_count, @spectrogram_height )
	# This is placeholder code for the simplest central rect, covering very roughly the central 1/3 (by area) of the spectrogram
	lowest_window = Integer(@window_count * 0.21)
	highest_window = Integer(@window_count * 0.79) + 1

	lowest_freq = Integer(@spectrogram_height * 0.21)
	highest_freq = Integer(@spectrogram_height * 0.79) + 1

	npixels = (highest_window - lowest_window) * (highest_freq - lowest_freq)
	@@diamond[lowest_window..highest_window,lowest_freq..highest_freq] = 1.0/npixels
	end

	def jitter_noise
	jitter = ( NArray.float( NSAMPLES ).random() - 0.5 ) * ( SAMPLE_SCALE * @sample_jitter )
	jitter
	end

	end