coppelia machine learning and analytics coppeliaMLA

## finSim.R
#First we are going to set up probaility distributions for our beliefs about the inputs
#We've been told ARPU is about £7 and it's very unlikely to be higher than £10 or lower than £4
#So we'll go for a normal distribution centred at 7 with 5% and 95% quantiles at 4 and 10

#Show how we get the variance

arpu.sd<-3/1.96
x<-seq(0, 15,by=0.5)
d<-dnorm(x, 7, arpu.sd)
plot(x, d, type='l')

## dfToJSON.R
#Load libraries

library(rjson)
library(stringr)


dfToJSON<-function(df, mode='vector'){

  colToList<-function(x, y){


## ArethereAnyGood5LetterDomains.py
'''
Created on Feb 6, 2014

@author: sraper
'''

import itertools, urllib, urllib2, time, re, random
from bs4 import BeautifulSoup

def catchURL(queryURL): # Nicked this from someone. Afraid I can't remember who. Sorry

## trimFirstLine.py
import os

dir = 'put your director in here'

for filename in os.listdir(dir):
    with open(dir+filename, 'r') as fin:
        data = fin.read().splitlines(True)
    with open(dir+filename, 'w') as fout:
        fout.writelines(data[1:])

## joinTables.sql
    drop table if exists recommender_set_num; --In case you need to rerun the script
    drop table if exists person_ids_full_names;
    drop table if exists recom_names;

    -- Set up a table to load the recommendations data into
    create external table if not exists recommender_set_num
    (
     userID     bigint,
     itemID     bigint
    ) row format delimited fields terminated by ','

## cubicSplineExample.R
x<-seq(1,10, by=0.1)
y<-sin(x/4)+rnorm(91, 0,0.05) #Sin fucntion plus noise
plot(x,y)


#Knots at 2, 4 and 6
x2<-x^2
x3<-x^3
k1<-(x>2)*(x-2)^3
k2<-(x>4)*(x-4)^3

## csvToPipe.py
import os, csv

progDir = '/pathToFolderContainingCSVs/'


for filename in os.listdir(progDir):
    if filename != '.DS_Store':
        with open(progDir+filename, 'rb') as csvfile:
            progReader = csv.reader(csvfile, delimiter=',', quotechar='"')

## binDiff.R
modBin<-function(k, n, p){

  if (k<=n) {
    return(dbinom(k, n, p))
  }
  else {
    return(0)
  }
}

## clusterSankey.R
#Sequence for adding new data
s<-seq(20,50, by=5)

#Set up object for recording clusters
clus.change<-NULL

#Cycle through the clustering solutions
for (i in s){

  hc <- hclust(dist(USArrests[1:i,]), "ave")

## DendToForce.R
#Run hclust
hc <- hclust(dist(USArrests[1:40,]), "ave")

#Function for extracting nodes and links
extractGraph<-function(hc){

  n<-length(hc$order)
  m<-hc$merge

  links<-data.frame(source=as.numeric(), target=as.numeric(), value=as.numeric())
	#First we are going to set up probaility distributions for our beliefs about the inputs
	#We've been told ARPU is about £7 and it's very unlikely to be higher than £10 or lower than £4
	#So we'll go for a normal distribution centred at 7 with 5% and 95% quantiles at 4 and 10

	#Show how we get the variance

	arpu.sd<-3/1.96
	x<-seq(0, 15,by=0.5)
	d<-dnorm(x, 7, arpu.sd)
	plot(x, d, type='l')
	#Load libraries

	library(rjson)
	library(stringr)


	dfToJSON<-function(df, mode='vector'){

	colToList<-function(x, y){
	'''
	Created on Feb 6, 2014

	@author: sraper
	'''

	import itertools, urllib, urllib2, time, re, random
	from bs4 import BeautifulSoup

	def catchURL(queryURL): # Nicked this from someone. Afraid I can't remember who. Sorry
	import os

	dir = 'put your director in here'

	for filename in os.listdir(dir):
	with open(dir+filename, 'r') as fin:
	data = fin.read().splitlines(True)
	with open(dir+filename, 'w') as fout:
	fout.writelines(data[1:])
	drop table if exists recommender_set_num; --In case you need to rerun the script
	drop table if exists person_ids_full_names;
	drop table if exists recom_names;

	-- Set up a table to load the recommendations data into
	create external table if not exists recommender_set_num
	(
	userID bigint,
	itemID bigint
	) row format delimited fields terminated by ','
	x<-seq(1,10, by=0.1)
	y<-sin(x/4)+rnorm(91, 0,0.05) #Sin fucntion plus noise
	plot(x,y)


	#Knots at 2, 4 and 6
	x2<-x^2
	x3<-x^3
	k1<-(x>2)*(x-2)^3
	k2<-(x>4)*(x-4)^3
	import os, csv

	progDir = '/pathToFolderContainingCSVs/'



	for filename in os.listdir(progDir):
	if filename != '.DS_Store':
	with open(progDir+filename, 'rb') as csvfile:
	progReader = csv.reader(csvfile, delimiter=',', quotechar='"')
	modBin<-function(k, n, p){

	if (k<=n) {
	return(dbinom(k, n, p))
	}
	else {
	return(0)
	}
	}
	#Sequence for adding new data
	s<-seq(20,50, by=5)

	#Set up object for recording clusters
	clus.change<-NULL

	#Cycle through the clustering solutions
	for (i in s){

	hc <- hclust(dist(USArrests[1:i,]), "ave")
	#Run hclust
	hc <- hclust(dist(USArrests[1:40,]), "ave")

	#Function for extracting nodes and links
	extractGraph<-function(hc){

	n<-length(hc$order)
	m<-hc$merge

	links<-data.frame(source=as.numeric(), target=as.numeric(), value=as.numeric())