Winston Campeau WinstonCampeau

## oos.txt
library(readr)
library(stringr)
oos <- read_csv("Documents/oos.txt", col_names = FALSE)

flatoos <- str_flatten(oos$X1, " ")

splitflatoos <- strsplit(flatoos, " ")

nchar(splitflatoos[[1]][1])

## gist:1d71eace809fc9a61c60f110bd3c3a5f
md <- read.csv(file.choose(), header=T) #This is not working, need to use FILE, IMPORT DATASET, FROM TEXT(BASE), then select csv, DESELECT "Strings as Factors", IMPORT

#removes leading and trailing numbers from sequences

for(i in 1:nrow(md)){
  md[i,] <- substr(md[i,], 3, nchar(md[i,])-2)
}


#Check for even length stem

## gist:f61eb1b97ce463f87e0d3c08a4ebab4d
#Collatz Conjecture
#Will produce graph and number of steps to converge to 1


collatz <- function(start_val, show_me) {
  if(missing(show_me)){
    show_me <- start_val
  }
  n <- start_val
  comp <- c()

## gist:ac6347034cc332968833edc47e387430
#Where seed commences the generation of all future numbers
#number is the number of resultant generated numbers
#show_list allows you to see the lsit of generated numbers
#raw changes the numbers from strings to numeric (1 = strings, 0 = numeric)

#I used this method to teach student's a little about simulation, probabilities, and p-values.
#I have them do the middle square method by hand using a 4-digit seed
#They quickly learn the pitfalls of the MSM and such a small length seed (can easily converge to 0 or cycle -- although sometimes with desirable results!)
#They then use excel or google sheets and the binom.dist function to get a dirty p-value. (Pick the lower count of heads and tails as your successes and then multiply by 2 for two-tailed)
#Student's can then comment on the simulation, how to improve the simulation; they discover the concept of probabilities, fair coins, and the utility of statistical testing.

## gist:4912a7ad0f681f780295838d65c0bcd8
CATCHALL <- function(NUM_RESAMPLES, KFOLDS, INDIVIDUAL_PLOTS) {

  if(nargs()==0){
    NUM_RESAMPLES <- 100
    KFOLDS <- 5
    INDIVIDUAL_PLOTS <- 0
  }

  cat("Please select your data; The data must be (X_COLUMN, Y_COLUMN) and csv file type\nIf you did not enter a number of bootstrap resamples, the default is set to 100 - double for jackknife - and defaults to 5 kfolds")
  cat("\n")

## gist:f6057afebb43f3249be40b8aa0d31ac9
# seed requires an even length number, no restrictions on length
# number is the number of numbers you wish to be generated
# raw=1 returns raw data, aka "0429" instead of 429
# this method is horrible, but still often used as a teaching method

MSM <- function(seed, number, raw) {
  options("scipen" = 2*nchar(seed))

  if(missing(number)) {
    number <- 15

## gist:800a55db17c52f460faae71d87f67593
#Generates dictionary of regional sets (id:array). Regional sets can vary in species composition and number of populations (I consider each element a population).

import numpy as np
import random
reg_sets = {}
global_set = []
n_set = []
unique = {}

#Adjust number of regional sets, number of species, and number of populations in each set

## gist:f27d1d68a3898f5b1b8d3e1bc156f90a
#Three traders F1, F2, and F3 each have a front wheel of a tricycle and three traders R1, R2, and R3 each have one rear wheel of a tricycle.
#To construct a tricycle, two rear wheels and one front wheel are required. The value of a coalition is the number of tricycles it can construct.
# E.g. v(FRR) = 1, v(RR) = 0 v(FR) = 0.

#Let A be the front wheels and B be the rear wheels

A={1,2,3}
B={4,5,6}

subset_dictionary = {}

## gist:d56b4e7ccc26b71dd7087b163df8ba3c
#Code generated with the help of Dr. Brett Stevens @ Carleton University
#input matrices are easily generated by, G.reduced_adjacency_matrix()

#Mutual information is minimized when p(x,y)=p(x)*p(y). Otherwise, the joint probability is equal to the product of the marginals

def mutual_entropy(input_matrix):

    matrix_sum = (matrix([1]*input_matrix.nrows())*input_matrix*(matrix([1]*input_matrix.ncols())).transpose())[0][0]

    row_probabilities = (input_matrix*(matrix([1]*input_matrix.ncols())).transpose()).transpose()[0]

## gist:b714bcfb64206a6aa05fe12c4b2d36db
#Code produced with the help of Dr. Brett Stevens @ Carleton University
#Add all potential missing edges in a bigraph and with weight

#First copy some bigraph G and generate list of known edges

auxillary_graph = G.copy()
edge_list = []
for edge in auxillary_graph.edges():
    edge_list.append((edge[0],edge[1]))
	library(readr)
	library(stringr)
	oos <- read_csv("Documents/oos.txt", col_names = FALSE)

	flatoos <- str_flatten(oos$X1, " ")

	splitflatoos <- strsplit(flatoos, " ")

	nchar(splitflatoos[[1]][1])
	md <- read.csv(file.choose(), header=T) #This is not working, need to use FILE, IMPORT DATASET, FROM TEXT(BASE), then select csv, DESELECT "Strings as Factors", IMPORT

	#removes leading and trailing numbers from sequences

	for(i in 1:nrow(md)){
	md[i,] <- substr(md[i,], 3, nchar(md[i,])-2)
	}


	#Check for even length stem
	#Collatz Conjecture
	#Will produce graph and number of steps to converge to 1


	collatz <- function(start_val, show_me) {
	if(missing(show_me)){
	show_me <- start_val
	}
	n <- start_val
	comp <- c()
	#Where seed commences the generation of all future numbers
	#number is the number of resultant generated numbers
	#show_list allows you to see the lsit of generated numbers
	#raw changes the numbers from strings to numeric (1 = strings, 0 = numeric)

	#I used this method to teach student's a little about simulation, probabilities, and p-values.
	#I have them do the middle square method by hand using a 4-digit seed
	#They quickly learn the pitfalls of the MSM and such a small length seed (can easily converge to 0 or cycle -- although sometimes with desirable results!)
	#They then use excel or google sheets and the binom.dist function to get a dirty p-value. (Pick the lower count of heads and tails as your successes and then multiply by 2 for two-tailed)
	#Student's can then comment on the simulation, how to improve the simulation; they discover the concept of probabilities, fair coins, and the utility of statistical testing.
	CATCHALL <- function(NUM_RESAMPLES, KFOLDS, INDIVIDUAL_PLOTS) {

	if(nargs()==0){
	NUM_RESAMPLES <- 100
	KFOLDS <- 5
	INDIVIDUAL_PLOTS <- 0
	}

	cat("Please select your data; The data must be (X_COLUMN, Y_COLUMN) and csv file type\nIf you did not enter a number of bootstrap resamples, the default is set to 100 - double for jackknife - and defaults to 5 kfolds")
	cat("\n")
	# seed requires an even length number, no restrictions on length
	# number is the number of numbers you wish to be generated
	# raw=1 returns raw data, aka "0429" instead of 429
	# this method is horrible, but still often used as a teaching method

	MSM <- function(seed, number, raw) {
	options("scipen" = 2*nchar(seed))

	if(missing(number)) {
	number <- 15
	#Generates dictionary of regional sets (id:array). Regional sets can vary in species composition and number of populations (I consider each element a population).

	import numpy as np
	import random
	reg_sets = {}
	global_set = []
	n_set = []
	unique = {}

	#Adjust number of regional sets, number of species, and number of populations in each set
	#Three traders F1, F2, and F3 each have a front wheel of a tricycle and three traders R1, R2, and R3 each have one rear wheel of a tricycle.
	#To construct a tricycle, two rear wheels and one front wheel are required. The value of a coalition is the number of tricycles it can construct.
	# E.g. v(FRR) = 1, v(RR) = 0 v(FR) = 0.

	#Let A be the front wheels and B be the rear wheels

	A={1,2,3}
	B={4,5,6}

	subset_dictionary = {}
	#Code generated with the help of Dr. Brett Stevens @ Carleton University
	#input matrices are easily generated by, G.reduced_adjacency_matrix()

	#Mutual information is minimized when p(x,y)=p(x)*p(y). Otherwise, the joint probability is equal to the product of the marginals

	def mutual_entropy(input_matrix):

	matrix_sum = (matrix([1]input_matrix.nrows())input_matrix(matrix([1]input_matrix.ncols())).transpose())[0][0]

	row_probabilities = (input_matrix(matrix([1]input_matrix.ncols())).transpose()).transpose()[0]
	#Code produced with the help of Dr. Brett Stevens @ Carleton University
	#Add all potential missing edges in a bigraph and with weight

	#First copy some bigraph G and generate list of known edges

	auxillary_graph = G.copy()
	edge_list = []
	for edge in auxillary_graph.edges():
	edge_list.append((edge[0],edge[1]))