Krishnan Raman krishnanraman

## gist:4b5ff446da6c55b971613fc43e4ebadc
tsum <- function(n, myn, mydf, myncp) {
  mylist <- c()
  for (i in 1:n) {
  samp <- rt(n=myn, df = mydf, ncp=myncp)
  mylist<- c(mylist,sum(samp) - min(samp))
  }
  return(mylist)
}

x<- tsum(1000, 250, 3, 1)

## lmfitparabola.py
import numpy as np

from lmfit import Minimizer, Parameters, report_fit

# create data to be fitted
x = np.linspace(0, 15, 301)
data = 2*x*x+ 3*x+4


# define objective function: returns the array to be minimized

## clickoptim.py
# maximize abc subject to a + b + c = 10

import numpy as np
import tensorflow as tf

tf.reset_default_graph()
abc = tf.get_variable("abc",shape=(3,1),dtype=tf.float32, initializer=tf.ones_initializer)
optimizer = tf.train.GradientDescentOptimizer(0.0001)

# grab a, b, c and the lambda l

## result.txt
// Exiting paste mode, now interpreting.

id = 8, isLeaf = true, predict = 0.0 (prob = -1.0), impurity = 0.0, split = None, stats = None
id = 9, isLeaf = true, predict = 1.4736842105263157 (prob = -1.0), impurity = 0.2493074792243767, split = None, stats = None
id = 10, isLeaf = true, predict = 3.0 (prob = -1.0), impurity = 0.16666666666666666, split = None, stats = None
id = 11, isLeaf = true, predict = 4.1 (prob = -1.0), impurity = 0.09000000000000057, split = None, stats = None
id = 12, isLeaf = true, predict = 5.0 (prob = -1.0), impurity = 0.0, split = None, stats = None
id = 13, isLeaf = true, predict = 6.444444444444445 (prob = -1.0), impurity = 0.2469135802469143, split = None, stats = None
id = 14, isLeaf = true, predict = 7.923076923076923 (prob = -1.0), impurity = 0.2248520710059158, split = None, stats = None
id = 15, isLeaf = true, predict = 9.0 (prob = -1.0), impurity = 0.0, split = None, stats = None

## output.txt
+---+---+
|  x|  y|
+---+---+
|  1|  0|
|  2|  0|
|  3|  0|
|  4|  0|
|  5|  0|
|  6|  0|
|  7|  0|

## smallest.scala
// Given 2 points in first quadrant
// Point p1 (a,b)
// Point p2 (c,d)
// a,b,c,d > 0
//
// Find point X(x,0) on x axis
// such that
// ||p1-X|| + ||p2-X|| is smallest
//

## ideas.txt
distribution data => distribution name, params supervised ANN classifier
eg.
Poisson(5) data 1000 samples => ("poisson", 5)
Uniform(10) data 1000 samples => ("uniform", 10)
Normal(mu, sig) data 1000 samples => ("normal", mu, sig)
etc.


## gist:2332883da0dc3a4febfece2108bafe2d
>spark-shell
import org.apache.spark.sql.SQLContext
val sql = new SQLContext(sc)
sql.read.format("orc").load("./test.orc").schema.foreach(println)

## dow.scala
import org.jfree.chart.{ChartFactory, ChartUtilities}
import org.jfree.data.time.{Day, TimeSeries, TimeSeriesCollection}
import scala.io.Source
import java.io.File

val dow = new TimeSeries("dow")
Source.fromFile("./DJI.csv").getLines.toStream.tail.foreach{
  x:String =>
  val arr = x.split(",")
  val day = Day.parseDay(arr(0))

## testmultiproc.py
import multiprocessing as mp
import numpy as np
import sys

def printf(format, *args):
    sys.stdout.write(format % args)

def f(x):
	sum = 0
	for i in np.arange(x, 2*x,1):
	tsum <- function(n, myn, mydf, myncp) {
	mylist <- c()
	for (i in 1:n) {
	samp <- rt(n=myn, df = mydf, ncp=myncp)
	mylist<- c(mylist,sum(samp) - min(samp))
	}
	return(mylist)
	}

	x<- tsum(1000, 250, 3, 1)
	import numpy as np

	from lmfit import Minimizer, Parameters, report_fit

	# create data to be fitted
	x = np.linspace(0, 15, 301)
	data = 2xx+ 3*x+4


	# define objective function: returns the array to be minimized
	# maximize abc subject to a + b + c = 10

	import numpy as np
	import tensorflow as tf

	tf.reset_default_graph()
	abc = tf.get_variable("abc",shape=(3,1),dtype=tf.float32, initializer=tf.ones_initializer)
	optimizer = tf.train.GradientDescentOptimizer(0.0001)

	# grab a, b, c and the lambda l
	// Exiting paste mode, now interpreting.

	id = 8, isLeaf = true, predict = 0.0 (prob = -1.0), impurity = 0.0, split = None, stats = None
	id = 9, isLeaf = true, predict = 1.4736842105263157 (prob = -1.0), impurity = 0.2493074792243767, split = None, stats = None
	id = 10, isLeaf = true, predict = 3.0 (prob = -1.0), impurity = 0.16666666666666666, split = None, stats = None
	id = 11, isLeaf = true, predict = 4.1 (prob = -1.0), impurity = 0.09000000000000057, split = None, stats = None
	id = 12, isLeaf = true, predict = 5.0 (prob = -1.0), impurity = 0.0, split = None, stats = None
	id = 13, isLeaf = true, predict = 6.444444444444445 (prob = -1.0), impurity = 0.2469135802469143, split = None, stats = None
	id = 14, isLeaf = true, predict = 7.923076923076923 (prob = -1.0), impurity = 0.2248520710059158, split = None, stats = None
	id = 15, isLeaf = true, predict = 9.0 (prob = -1.0), impurity = 0.0, split = None, stats = None
	+---+---+
	\| x\| y\|
	+---+---+
	\| 1\| 0\|
	\| 2\| 0\|
	\| 3\| 0\|
	\| 4\| 0\|
	\| 5\| 0\|
	\| 6\| 0\|
	\| 7\| 0\|
	// Given 2 points in first quadrant
	// Point p1 (a,b)
	// Point p2 (c,d)
	// a,b,c,d > 0
	//
	// Find point X(x,0) on x axis
	// such that
	// \|\|p1-X\|\| + \|\|p2-X\|\| is smallest
	//
	distribution data => distribution name, params supervised ANN classifier
	eg.
	Poisson(5) data 1000 samples => ("poisson", 5)
	Uniform(10) data 1000 samples => ("uniform", 10)
	Normal(mu, sig) data 1000 samples => ("normal", mu, sig)
	etc.
	>spark-shell
	import org.apache.spark.sql.SQLContext
	val sql = new SQLContext(sc)
	sql.read.format("orc").load("./test.orc").schema.foreach(println)
	import org.jfree.chart.{ChartFactory, ChartUtilities}
	import org.jfree.data.time.{Day, TimeSeries, TimeSeriesCollection}
	import scala.io.Source
	import java.io.File

	val dow = new TimeSeries("dow")
	Source.fromFile("./DJI.csv").getLines.toStream.tail.foreach{
	x:String =>
	val arr = x.split(",")
	val day = Day.parseDay(arr(0))
	import multiprocessing as mp
	import numpy as np
	import sys

	def printf(format, *args):
	sys.stdout.write(format % args)

	def f(x):
	sum = 0
	for i in np.arange(x, 2*x,1):