Lissa Hyacinth lissahyacinth

## regexp_extract.R
# regexp_extract - Replacement for base regexp_matching in R
#
# Uses default matching for regular expressions, but supports capture groups.
# Main use is an alternative to bad gsub matching to pull out features.
#
# regexp_extract(pattern = '([0-9])([a-z])',
#                 text = '3k2n2k',
#                 perl = TRUE,
#                 group = 1)
# > 3

## array_sort_by.sql
-- LINES 1 - 4: Transform(  to x -> ELEMENT_AT
-- The TRANSFORM orders the array by a transform of the array.
-- Channel Array is of form A#T :: String, where T :: Integral
-- RESULT: An ordered array :: Integral, which will form our keys for the second section.

-- LINES 5 - 11: ELEMENT_AT( to END
-- We now take our keys one by one, and use them in ELEMENT_AT(mapping, key) to generate a new array.
-- As our keys are in the right order, our end array will also be. (Duplicate keys will cause a missing hit.)
-- The other lines generate the actual map.
-- RESULT: An ordered array.

## select_multi.R
select_multi <- function(df, cols){
  if("data.table" %in% class(df)){
    return(df[,c(1:ncol(df))[colnames(df) %in% cols], with = FALSE])
  }
  df[,c(1:ncol(df))[colnames(df) %in% cols]]
}

## char_ngram_base.R
ngram = function(x, y, n = 2) {
    if((which(x==y) + (n-2)) >= length(y)){return('')}
    return(
      paste0(unlist(
        lapply(0:(n-1), function(z){
          y[which(x == y) + z]
        })), collapse = ",")
        )
  }


## Residuals Mapping (MAPE).R
get_resid_map <- function(model, test_data, test_col){
  prediction <- predict(model, test_data)
  test_data_col = test_data[[test_col]]
  sprintf("MAPE is %s", MAPE(actual = test_data_col, predicted = prediction))
  frame = data.frame(predicted = prediction, actual = test_data_col)
  frame$diff = frame$predicted - frame$actual
  frame$absdiff = abs(frame$predicted - frame$actual)
  return(list(
  mape = sprintf("MAPE is %s", MAPE(actual = test_data_col, predicted = prediction)),
  pva = ggplot(data = frame, aes(x = predicted, y = actual)) + geom_jitter() + ggtitle("Predicted v Actuals"),

## gitparse.bash
# Changing Prompt for Bash
inside_git_repo="\$(git rev-parse --is-inside-work-tree 2>/dev/null)"

parse_git_branch() {
  if [ "$inside_git_repo" ]; then
    git branch 2> /dev/null | sed -e '/^[^*]/d' -e 's/* \(.*\)/ (\1)/'
  else echo ""
  fi
}

## PrestoMLExample.sql
-- Example learning sin(x)

WITH validation_data AS (
    SELECT
    label,
    MAP(ARRAY[feature_label], ARRAY[feature]) AS features
    FROM (
        SELECT
        TRANSFORM(SEQUENCE(1,100), X ->  SIN(CAST( X AS DOUBLE))) AS n_label,
        TRANSFORM(REPEAT(1, 100), X -> CAST(X AS BIGINT)) AS n_feature_label,

## PrestoExample.R
library(RPresto)
library(httr)
library(DBI)

# Utils ####
.request.headers <- function(conn, transaction_id = 'NONE') {
  if(transaction_id == 'NONE'){
    return(httr::add_headers(
      "X-Presto-User"= conn@user,
      "X-Presto-Catalog"= conn@catalog,

## streaming.rs
// Design copied from emk's Rust Streaming (github.com/emk/rust-streaming)
use std::cell::Cell;

pub struct StreamingView<'a, T> {
        cur: Cell<usize>,
        limit: usize,
        reference: &'a[T]
    }

    #[macro_export]

## reducable.ml

let reducable a b =
 {code for aA reducing to nothing etc}

let rec solve a x =
if a.length = 0
  then match x with
  # a :: b indicates that x is being split into a - the head and b - the tail of the list.
  # [] indicates an empty element
    elem :: list_of_elems -> (solve elem list_of_elems)
	# regexp_extract - Replacement for base regexp_matching in R
	#
	# Uses default matching for regular expressions, but supports capture groups.
	# Main use is an alternative to bad gsub matching to pull out features.
	#
	# regexp_extract(pattern = '([0-9])([a-z])',
	# text = '3k2n2k',
	# perl = TRUE,
	# group = 1)
	# > 3
	-- LINES 1 - 4: Transform( to x -> ELEMENT_AT
	-- The TRANSFORM orders the array by a transform of the array.
	-- Channel Array is of form A#T :: String, where T :: Integral
	-- RESULT: An ordered array :: Integral, which will form our keys for the second section.

	-- LINES 5 - 11: ELEMENT_AT( to END
	-- We now take our keys one by one, and use them in ELEMENT_AT(mapping, key) to generate a new array.
	-- As our keys are in the right order, our end array will also be. (Duplicate keys will cause a missing hit.)
	-- The other lines generate the actual map.
	-- RESULT: An ordered array.
	select_multi <- function(df, cols){
	if("data.table" %in% class(df)){
	return(df[,c(1:ncol(df))[colnames(df) %in% cols], with = FALSE])
	}
	df[,c(1:ncol(df))[colnames(df) %in% cols]]
	}
	ngram = function(x, y, n = 2) {
	if((which(x==y) + (n-2)) >= length(y)){return('')}
	return(
	paste0(unlist(
	lapply(0:(n-1), function(z){
	y[which(x == y) + z]
	})), collapse = ",")
	)
	}
	get_resid_map <- function(model, test_data, test_col){
	prediction <- predict(model, test_data)
	test_data_col = test_data[[test_col]]
	sprintf("MAPE is %s", MAPE(actual = test_data_col, predicted = prediction))
	frame = data.frame(predicted = prediction, actual = test_data_col)
	frame$diff = frame$predicted - frame$actual
	frame$absdiff = abs(frame$predicted - frame$actual)
	return(list(
	mape = sprintf("MAPE is %s", MAPE(actual = test_data_col, predicted = prediction)),
	pva = ggplot(data = frame, aes(x = predicted, y = actual)) + geom_jitter() + ggtitle("Predicted v Actuals"),
	# Changing Prompt for Bash
	inside_git_repo="\$(git rev-parse --is-inside-work-tree 2>/dev/null)"

	parse_git_branch() {
	if [ "$inside_git_repo" ]; then
	git branch 2> /dev/null \| sed -e '/^[^]/d' -e 's/ \(.*\)/ (\1)/'
	else echo ""
	fi
	}
	-- Example learning sin(x)

	WITH validation_data AS (
	SELECT
	label,
	MAP(ARRAY[feature_label], ARRAY[feature]) AS features
	FROM (
	SELECT
	TRANSFORM(SEQUENCE(1,100), X -> SIN(CAST( X AS DOUBLE))) AS n_label,
	TRANSFORM(REPEAT(1, 100), X -> CAST(X AS BIGINT)) AS n_feature_label,
	library(RPresto)
	library(httr)
	library(DBI)

	# Utils ####
	.request.headers <- function(conn, transaction_id = 'NONE') {
	if(transaction_id == 'NONE'){
	return(httr::add_headers(
	"X-Presto-User"= conn@user,
	"X-Presto-Catalog"= conn@catalog,
	// Design copied from emk's Rust Streaming (github.com/emk/rust-streaming)
	use std::cell::Cell;

	pub struct StreamingView<'a, T> {
	cur: Cell<usize>,
	limit: usize,
	reference: &'a[T]
	}

	#[macro_export]

	let reducable a b =
	{code for aA reducing to nothing etc}

	let rec solve a x =
	if a.length = 0
	then match x with
	# a :: b indicates that x is being split into a - the head and b - the tail of the list.
	# [] indicates an empty element
	elem :: list_of_elems -> (solve elem list_of_elems)