Stuart Gordon Reid StuartGordonReid

## OverlappingPatterns.py
def overlapping_patterns(self, bin_data: str, pattern_size=9, block_size=1032):
    """
    Note that this description is taken from the NIST documentation [1]
    [1] http://csrc.nist.gov/publications/nistpubs/800-22-rev1a/SP800-22rev1a.pdf

    The focus of the Overlapping Template Matching test is the number of occurrences of pre-specified target
    strings. Both this test and the Non-overlapping Template Matching test of Section 2.7 use an m-bit
    window to search for a specific m-bit pattern. As with the test in Section 2.7, if the pattern is not found,
    the window slides one bit position. The difference between this test and the test in Section 2.7 is that
    when the pattern is found, the window slides only one bit before resuming the search.

## Spectral.py
def spectral(self, bin_data: str):
    """
    Note that this description is taken from the NIST documentation [1]
    [1] http://csrc.nist.gov/publications/nistpubs/800-22-rev1a/SP800-22rev1a.pdf

    The focus of this test is the peak heights in the Discrete Fourier Transform of the sequence. The purpose of
    this test is to detect periodic features (i.e., repetitive patterns that are near each other) in the tested
    sequence that would indicate a deviation from the assumption of randomness. The intention is to detect whether
    the number of peaks exceeding the 95 % threshold is significantly different than 5 %.

## Runs.py
def independent_runs(self, bin_data: str):
    """
    Note that this description is taken from the NIST documentation [1]
    [1] http://csrc.nist.gov/publications/nistpubs/800-22-rev1a/SP800-22rev1a.pdf

    The focus of this tests if the total number of runs in the sequences, where a run is an uninterrupted sequence
    of identical bits. A run of length k consists of k identical bits and is bounded before and after with a bit of
    the opposite value. The purpose of the runs tests is to determine whether the number of runs of ones and zeros
    of various lengths is as expected for a random sequence. In particular, this tests determines whether the
    oscillation between zeros and ones is either too fast or too slow.

## InterbankNetworkMainMethod.py
if __name__ == "__main__":
    banks = []
    with open('#BankData','rb') as data:
        reader = csv.reader(data)
        for datum in reader:
            # Datum = ['Bank name', 'Assets', 'Deposits']
            bank = Bank(datum[0], datum[1], datum[2])
            banks.append(bank)
    interbankNetwork = InterbankNetwork(banks)

## InterbankNetwork.py
# This contains the actual implementation of a simple systemic risk modeller.
# It is essentially a network based computational model of systemic risk.

__author__="stuart"
__date__ ="$22 Jan 2014 9:19:08 PM$"

import csv
import random
from GraphDataStructure import Graph
from GraphDataStructure import Node

## SimpleGraph.py
# Foundation code for a network-based computational model of
# systemic risk in banking networks. These classes are then
# inherited from to create the interbank network itself.

__author__="Stuart Gordon Reid"
__date__ ="$08 Jan 2014 9:06:32 PM$"

# This class encapsulates the code for a Node object. To
# inherit from this class simple say: class Bank(Node):
class Node:

## RegressionAnalysis.py
__author__ = 'Stuart Gordon Reid'

import os as os
import csv as csv
import numpy as np
import scipy as spy
import sklearn as kit
import pandas as pandas
import statsmodels.api as sm
import matplotlib.pyplot as plot

## MonteCarloKMeans.py
def forest_run(dimensions, patterns, pattern_labels, metric='qe', k_up=20, k_down=2, simulations=55, iterations=50):
    """
    A method for watching Forest Gump run
    :param dimensions: the dimensionality of the data
    :param patterns: the data itself
    :param metric: the quality metric
    :param k_up: the maximum number of clusters
    :param k_down: the minimum number of clusters
    :param simulations: the number of simulations for each k
    :param iterations: the number of iterations for each k-means pass

## KMeansClustering.py
class Clustering:
    """
    An instance of the Clustering is a solution i.e. a particular partitioning of the (heterogeneous) data set into
    homogeneous subsets. For Centroid based clustering algorithms this involves looking at each pattern and assigning
    it to it's nearest centroid. This is done by calculating the distance between each pattern and every centroid and
    selecting the one with the smallest distance. Here we use are using fractional distance with the default parameters.

    :param d: dimensionality of the input patterns
    :param k: the pre-specified number of clusters & centroids
    :param z: the patterns in the data set

## AdditionaToClustering.py
class Clustering:

   def k_means_clustering(self, n, s=1.0):
        """
        This method performs the K-means clustering algorithm on the data for n iterations. This involves updating the
        centroids using the mean-shift heuristic n-times and reassigning the patterns to their closest centroids.
        :param n: number of iterations to complete
        :param s: the scaling factor to use when updating the centroids
        pick on which has a better solution (according to some measure of cluster quality)
        """
	def overlapping_patterns(self, bin_data: str, pattern_size=9, block_size=1032):
	"""
	Note that this description is taken from the NIST documentation [1]
	[1] http://csrc.nist.gov/publications/nistpubs/800-22-rev1a/SP800-22rev1a.pdf

	The focus of the Overlapping Template Matching test is the number of occurrences of pre-specified target
	strings. Both this test and the Non-overlapping Template Matching test of Section 2.7 use an m-bit
	window to search for a specific m-bit pattern. As with the test in Section 2.7, if the pattern is not found,
	the window slides one bit position. The difference between this test and the test in Section 2.7 is that
	when the pattern is found, the window slides only one bit before resuming the search.
	def spectral(self, bin_data: str):
	"""
	Note that this description is taken from the NIST documentation [1]
	[1] http://csrc.nist.gov/publications/nistpubs/800-22-rev1a/SP800-22rev1a.pdf

	The focus of this test is the peak heights in the Discrete Fourier Transform of the sequence. The purpose of
	this test is to detect periodic features (i.e., repetitive patterns that are near each other) in the tested
	sequence that would indicate a deviation from the assumption of randomness. The intention is to detect whether
	the number of peaks exceeding the 95 % threshold is significantly different than 5 %.
	def independent_runs(self, bin_data: str):
	"""
	Note that this description is taken from the NIST documentation [1]
	[1] http://csrc.nist.gov/publications/nistpubs/800-22-rev1a/SP800-22rev1a.pdf

	The focus of this tests if the total number of runs in the sequences, where a run is an uninterrupted sequence
	of identical bits. A run of length k consists of k identical bits and is bounded before and after with a bit of
	the opposite value. The purpose of the runs tests is to determine whether the number of runs of ones and zeros
	of various lengths is as expected for a random sequence. In particular, this tests determines whether the
	oscillation between zeros and ones is either too fast or too slow.
	if __name__ == "__main__":
	banks = []
	with open('#BankData','rb') as data:
	reader = csv.reader(data)
	for datum in reader:
	# Datum = ['Bank name', 'Assets', 'Deposits']
	bank = Bank(datum[0], datum[1], datum[2])
	banks.append(bank)
	interbankNetwork = InterbankNetwork(banks)
	# This contains the actual implementation of a simple systemic risk modeller.
	# It is essentially a network based computational model of systemic risk.

	__author__="stuart"
	__date__ ="$22 Jan 2014 9:19:08 PM$"

	import csv
	import random
	from GraphDataStructure import Graph
	from GraphDataStructure import Node
	# Foundation code for a network-based computational model of
	# systemic risk in banking networks. These classes are then
	# inherited from to create the interbank network itself.

	__author__="Stuart Gordon Reid"
	__date__ ="$08 Jan 2014 9:06:32 PM$"

	# This class encapsulates the code for a Node object. To
	# inherit from this class simple say: class Bank(Node):
	class Node:
	__author__ = 'Stuart Gordon Reid'

	import os as os
	import csv as csv
	import numpy as np
	import scipy as spy
	import sklearn as kit
	import pandas as pandas
	import statsmodels.api as sm
	import matplotlib.pyplot as plot
	def forest_run(dimensions, patterns, pattern_labels, metric='qe', k_up=20, k_down=2, simulations=55, iterations=50):
	"""
	A method for watching Forest Gump run
	:param dimensions: the dimensionality of the data
	:param patterns: the data itself
	:param metric: the quality metric
	:param k_up: the maximum number of clusters
	:param k_down: the minimum number of clusters
	:param simulations: the number of simulations for each k
	:param iterations: the number of iterations for each k-means pass
	class Clustering:
	"""
	An instance of the Clustering is a solution i.e. a particular partitioning of the (heterogeneous) data set into
	homogeneous subsets. For Centroid based clustering algorithms this involves looking at each pattern and assigning
	it to it's nearest centroid. This is done by calculating the distance between each pattern and every centroid and
	selecting the one with the smallest distance. Here we use are using fractional distance with the default parameters.

	:param d: dimensionality of the input patterns
	:param k: the pre-specified number of clusters & centroids
	:param z: the patterns in the data set
	class Clustering:

	def k_means_clustering(self, n, s=1.0):
	"""
	This method performs the K-means clustering algorithm on the data for n iterations. This involves updating the
	centroids using the mean-shift heuristic n-times and reassigning the patterns to their closest centroids.
	:param n: number of iterations to complete
	:param s: the scaling factor to use when updating the centroids
	pick on which has a better solution (according to some measure of cluster quality)
	"""