David Young dcyoung

## EM_random_initializations.matlab
% Initialize pi vector and mu matrix to random values (unitized)

% Vector of probabilities for segments... 1 value for each segment.
% Best to think of it like this...
% When the image was generated, color was determined for each pixel by selecting
% a value from one of "n" normal distributions. Each value in this vector
% corresponds to the probability that a given normal distribution was chosen.
pi = rand(nSegments, 1); %repmat(1/nSegments, nSegments, 1);
pi = pi./sum(pi);

## EM_image_segmentation.matlab
% Clear the workspace
clear all; clc;

% Set the workspace
cd '/directoryWithImageNamesGoHere'

% Potential image names
imgNames = {'balloons', 'mountains', 'nature', 'ocean', 'polarlights'};
segmentCounts = [10,20,50];

## parallel_vector_addition_cuda_gpu.c
// kernel for weighted vector addition on GPU
__global__ void weightedVecAddKernel(float* out, float* A, float* B, int len, float weight_a, float weight_b) {
	int thisThreadIndex = blockIdx.x*blockDim.x + threadIdx.x;
	if (thisThreadIndex < len) {
		out[thisThreadIndex] = A[thisThreadIndex] * weight_a + B[thisThreadIndex] * weight_b;
	}
}
// compute weighted vector addition on GPU: out = weight_a*A + weight_b*B
void weightedVecAdd(float* out, float* A, float* B, int len, float weight_a, float weight_b) {
	// figures out how to fit computation to the "geometry"

## naive_cpu_vector_addition.c
// compute weighted vector addition on CPU: out = weight_a*A + weight_b*B
void cpuWeightedVectorAdd(float* out, float* A, float* B, int len, float weight_a, float weight_b) {
	for (int i = 0; i < len; i++) {
		out[i] = A[i] * weight_a + B[i] * weight_b;
	}
}

## naive_cpu_reduction.c
// naive CPU reduction
float reductionCPU(float* A, int len) {
    float result = 0.0;
    for (int i = 0; i < len ; i++) {
        result += A[i];
    }
    return result;
}

## parallel_reduction_cuda_gpu.c
// kernel for reduction on GPU
__global__ void reductionKernel(float* A, int len, int level) {
	int thisThreadIndex = blockIdx.x*blockDim.x + threadIdx.x;
	thisThreadIndex = thisThreadIndex * 2 * level;
	if (thisThreadIndex < len) {
		A[thisThreadIndex] = A[thisThreadIndex] + A[thisThreadIndex + level];
	}
}

// Compute reduction of elements in A

## solver.c
#include <string.h>
#include <stdio.h>
#include <math.h>
#include "benchmark.h"
#include <nmmintrin.h>
#include <smmintrin.h>
#include <omp.h>

/** Computes the dot product of 2 vectors*/
float dotp(float* u, float* A, size_t n);

## unique_identifier.js
// Creates a GUID - Globaly Unique Identifier
let createGUID = () => {
    function s4() {
        return Math.floor((1 + Math.random()) * 0x10000).toString(16).substring(1);
    }
    return s4() + s4() + '-' + s4() + '-' + s4() + '-' +
        s4() + '-' + s4() + s4() + s4();
};

## simple_timer.cpp
#include <chrono>
#include <iostream>

class Timer {
  public:
    Timer() : beg_(clock_::now()) {}
    void reset() { beg_ = clock_::now(); }
    double elapsed() const { return std::chrono::duration_cast<second_>(clock_::now() - beg_).count(); }

  private:
	% Initialize pi vector and mu matrix to random values (unitized)

	% Vector of probabilities for segments... 1 value for each segment.
	% Best to think of it like this...
	% When the image was generated, color was determined for each pixel by selecting
	% a value from one of "n" normal distributions. Each value in this vector
	% corresponds to the probability that a given normal distribution was chosen.
	pi = rand(nSegments, 1); %repmat(1/nSegments, nSegments, 1);
	pi = pi./sum(pi);
	% Clear the workspace
	clear all; clc;

	% Set the workspace
	cd '/directoryWithImageNamesGoHere'

	% Potential image names
	imgNames = {'balloons', 'mountains', 'nature', 'ocean', 'polarlights'};
	segmentCounts = [10,20,50];
	// kernel for weighted vector addition on GPU
	__global__ void weightedVecAddKernel(float* out, float* A, float* B, int len, float weight_a, float weight_b) {
	int thisThreadIndex = blockIdx.x*blockDim.x + threadIdx.x;
	if (thisThreadIndex < len) {
	out[thisThreadIndex] = A[thisThreadIndex] * weight_a + B[thisThreadIndex] * weight_b;
	}
	}
	// compute weighted vector addition on GPU: out = weight_aA + weight_bB
	void weightedVecAdd(float* out, float* A, float* B, int len, float weight_a, float weight_b) {
	// figures out how to fit computation to the "geometry"
	// compute weighted vector addition on CPU: out = weight_aA + weight_bB
	void cpuWeightedVectorAdd(float* out, float* A, float* B, int len, float weight_a, float weight_b) {
	for (int i = 0; i < len; i++) {
	out[i] = A[i] * weight_a + B[i] * weight_b;
	}
	}
	// naive CPU reduction
	float reductionCPU(float* A, int len) {
	float result = 0.0;
	for (int i = 0; i < len ; i++) {
	result += A[i];
	}
	return result;
	}
	// kernel for reduction on GPU
	__global__ void reductionKernel(float* A, int len, int level) {
	int thisThreadIndex = blockIdx.x*blockDim.x + threadIdx.x;
	thisThreadIndex = thisThreadIndex * 2 * level;
	if (thisThreadIndex < len) {
	A[thisThreadIndex] = A[thisThreadIndex] + A[thisThreadIndex + level];
	}
	}

	// Compute reduction of elements in A
	#include <string.h>
	#include <stdio.h>
	#include <math.h>
	#include "benchmark.h"
	#include <nmmintrin.h>
	#include <smmintrin.h>
	#include <omp.h>

	/** Computes the dot product of 2 vectors*/
	float dotp(float* u, float* A, size_t n);
	// Creates a GUID - Globaly Unique Identifier
	let createGUID = () => {
	function s4() {
	return Math.floor((1 + Math.random()) * 0x10000).toString(16).substring(1);
	}
	return s4() + s4() + '-' + s4() + '-' + s4() + '-' +
	s4() + '-' + s4() + s4() + s4();
	};
	#include <chrono>
	#include <iostream>

	class Timer {
	public:
	Timer() : beg_(clock_::now()) {}
	void reset() { beg_ = clock_::now(); }
	double elapsed() const { return std::chrono::duration_cast<second_>(clock_::now() - beg_).count(); }

	private: