Neeraj Khanchandani nk773

## generateData.R
generate <- function(mu=0, sigma=1, dist="normal", nocols=5, norows=100, min=0, max=1, sd=3000) {
  set.seed(sd)
  if (dist == "uniform") {
    df <- replicate(nocols,runif(norows)*(min+(max-min)))
    return(df)
  }
  if (dist == "normal") {
    df <- replicate(nocols,rnorm(norows, mu, sigma)*(min+(max-min)))
    return (df)
  }

## Concatenate.R
concatenate<-function(dataset1, colrng, sep=" ") {

  rng <- which(names(dataset1)%in%colrng)
  if (length(rng)<2) {
    print(paste("Insufficient columns for concatenation", length(rng), rng))
    return(dataset1)
  }
  df<-dataset1[,-rng]

  df3<-apply(dataset1[,rng],1, paste, collapse=sep)

## addstats.R
addMeanFeature<-function(dataset1, colrng, bycol=NA, windowsize){
  library(zoo)
  id<-bycol
  #print(paste("id =", id))
  rollingmean = c()
  rollingsd = c()
  rng <- which(names(dataset1)%in%colrng)
  a = paste("a",(1:length(rng)),sep="") # average
  sd =paste("sd",(1:length(rng)),sep="") # standard deviation


## TextToColumns.R
TextToCols<-function(dataset1, dataset2 = NA, selcol, splch, incorig) {
  if (length(selcol)>1)
  {

    scol<-selcol[1]
  }
  else
  {
    scol<-selcol
  }

## evaluateForecast.R
# Map 1-based optional input ports to variables
library(forecast)
masefun <- function(observed, predicted){
  error = 0;
  if (length(observed) != length(predicted)) {
    return (NA);
  } else if (length(observed) == 0 || length(predicted) == 0) {
    return (NA);
  }
  else {

## forecast.R
fsscore<-function(model, numPeriodsToForecast) {
  if (numPeriodsToForecast<=0)
  {
    print("ERROR: forecast doesn't have any unknow value as time attribute present in training data")
    return(data.frame(NA))
  }
  else
  {

    forecastedData <- forecast(model, h=numPeriodsToForecast)

## cmp.R
compareData<-function(dataset1, dataset2)
{
  # compares two datasets for each cell value. If dimensions of dataset differ, it uses minimum dimension
  # Args:
  #   dataset1: first dataset to be compared.
  #   dataset2: second dataset to be compared.
  #
  # Returns:
  #   Returns list of the following
  #       dataframe containing boolean matrix for each cell indicating match or no match

## nestedlag.R
lf2 <- function (dataset2, valcol2, numfeats, skps){
  # Computes the lag features for specific column specified by valcol2 within dataset2
  # This creates total numfeats new features and it assumes dataset is already sorted by time
  # Args:
  #   dataset2: dataset to be used for adding new features.
  #   valcol2:  index of the column for which we need to create new features.
  #   numfeats: number of features to create. For example, if this is 3, then it will create 3 new lag features
  #   skps:     created nested lag features by using this array as skipping interval for each level
  # Returns:
  #   New dataset with additional features

## lag.R
lf1 <- function (dataset2, valcol2, numfeats){
  # Computes the lag features for specific column specified by valcol2 within dataset2
  # This creates total numfeats new features and it assumes dataset is already sorted by time
  # Args:
  #   dataset2: dataset to be used for adding new features.
  #   valcol2:  index of the column for which we need to create new features.
  #   numfeats: number of features to create. For example, if this is 3, then it will create 3 new lag features
  #
  # Returns:
  #   New dataset with additional features

## gist:febb753b87db00c628f1dbc0e86de6c6
#!/bin/bash

if [ $# -lt 3 ]
then
   echo "Number of arguments doesn't have required mandatory parameters"
   echo "azureml_rrs <PathToRRSjsonFile> <API key> <RRS URL>"
   echo "Path to RRS json file    $1"
   echo "API Key                  $2"
   echo "RRS URL                  $3"
   echo "Total Arguments          $#"
	generate <- function(mu=0, sigma=1, dist="normal", nocols=5, norows=100, min=0, max=1, sd=3000) {
	set.seed(sd)
	if (dist == "uniform") {
	df <- replicate(nocols,runif(norows)*(min+(max-min)))
	return(df)
	}
	if (dist == "normal") {
	df <- replicate(nocols,rnorm(norows, mu, sigma)*(min+(max-min)))
	return (df)
	}
	concatenate<-function(dataset1, colrng, sep=" ") {

	rng <- which(names(dataset1)%in%colrng)
	if (length(rng)<2) {
	print(paste("Insufficient columns for concatenation", length(rng), rng))
	return(dataset1)
	}
	df<-dataset1[,-rng]

	df3<-apply(dataset1[,rng],1, paste, collapse=sep)
	addMeanFeature<-function(dataset1, colrng, bycol=NA, windowsize){
	library(zoo)
	id<-bycol
	#print(paste("id =", id))
	rollingmean = c()
	rollingsd = c()
	rng <- which(names(dataset1)%in%colrng)
	a = paste("a",(1:length(rng)),sep="") # average
	sd =paste("sd",(1:length(rng)),sep="") # standard deviation
	TextToCols<-function(dataset1, dataset2 = NA, selcol, splch, incorig) {
	if (length(selcol)>1)
	{

	scol<-selcol[1]
	}
	else
	{
	scol<-selcol
	}
	# Map 1-based optional input ports to variables
	library(forecast)
	masefun <- function(observed, predicted){
	error = 0;
	if (length(observed) != length(predicted)) {
	return (NA);
	} else if (length(observed) == 0 \|\| length(predicted) == 0) {
	return (NA);
	}
	else {
	fsscore<-function(model, numPeriodsToForecast) {
	if (numPeriodsToForecast<=0)
	{
	print("ERROR: forecast doesn't have any unknow value as time attribute present in training data")
	return(data.frame(NA))
	}
	else
	{

	forecastedData <- forecast(model, h=numPeriodsToForecast)
	compareData<-function(dataset1, dataset2)
	{
	# compares two datasets for each cell value. If dimensions of dataset differ, it uses minimum dimension
	# Args:
	# dataset1: first dataset to be compared.
	# dataset2: second dataset to be compared.
	#
	# Returns:
	# Returns list of the following
	# dataframe containing boolean matrix for each cell indicating match or no match
	lf2 <- function (dataset2, valcol2, numfeats, skps){
	# Computes the lag features for specific column specified by valcol2 within dataset2
	# This creates total numfeats new features and it assumes dataset is already sorted by time
	# Args:
	# dataset2: dataset to be used for adding new features.
	# valcol2: index of the column for which we need to create new features.
	# numfeats: number of features to create. For example, if this is 3, then it will create 3 new lag features
	# skps: created nested lag features by using this array as skipping interval for each level
	# Returns:
	# New dataset with additional features
	lf1 <- function (dataset2, valcol2, numfeats){
	# Computes the lag features for specific column specified by valcol2 within dataset2
	# This creates total numfeats new features and it assumes dataset is already sorted by time
	# Args:
	# dataset2: dataset to be used for adding new features.
	# valcol2: index of the column for which we need to create new features.
	# numfeats: number of features to create. For example, if this is 3, then it will create 3 new lag features
	#
	# Returns:
	# New dataset with additional features
	#!/bin/bash

	if [ $# -lt 3 ]
	then
	echo "Number of arguments doesn't have required mandatory parameters"
	echo "azureml_rrs <PathToRRSjsonFile> <API key> <RRS URL>"
	echo "Path to RRS json file $1"
	echo "API Key $2"
	echo "RRS URL $3"
	echo "Total Arguments $#"