Martin Chan martinctc

## Data2Worksheets.vbs
Sub parse_data()
Dim lr As Long
Dim ws As Worksheet
Dim vcol, i As Integer
Dim icol As Long
Dim myarr As Variant
Dim title As String
Dim titlerow As Integer

'This macro splits data into multiple worksheets based on the variables on a column found in Excel.

## approx_num.R
#' @title Convert a numeric value into a natural language approximation string
#'
#' @description
#' This function takes a numeric value and returns a string that approximates the value in natural language.
#'
#' @param x A numeric value.
#'
#' @examples
#' approx_num(0.5)
#' # [1] "increased by a half"

## test-python-rf-runtime.py
# data cleaning and utility
import numpy as np
import pandas as pd
import vivainsights as vi
import os

# timing code
import time
import random
import sys

## get-pypi-stats.py
import requests
import pandas as pd

package_name = "vivainsights"
api_endpoint = f"https://pypistats.org/api/packages/{package_name}/overall"

response = requests.get(api_endpoint)

if response.status_code == 200:
    data = response.json()

## ForceNetwork_example.R
library(tidyverse)
library(networkD3)

## Nodes data frame describing all the nodes in the network
## The first entry in nodes dataframe is node 0, the next entry is node 1 and so on.
## The nodes dataframe must be sorted according to this sequence.
## This is the only way to tie the nodes dataframe to the links dataframe.
TestNodes <- data.frame(name = c("Alpha",
                                 "Beta",
                                 "Cat",

## power-analysis.R
# See <https://rpubs.com/mbounthavong/sample_size_power_analysis_R>

library(pwr)

# Sample size estimations for two proportions
# `pwr::ES.h()` computes effect size for two proportions
# n provides required sample size

p0 <- pwr.2p.test(h = ES.h(p1 = 0.60, p2 = 0.50), sig.level = 0.05, power = .80)
plot(p0)

## power-analysis.py
# estimate sample size via power analysis
from statsmodels.stats.power import TTestIndPower

# parameters for power analysis
effect = 0.8
alpha = 0.05
power = 0.8

# perform power analysis
analysis = TTestIndPower()

## str_arrange.R
#' Sorts letters in a character string by alphabetical order
#'
#' Vectorised
str_arrange <- function(x){
  x %>%
    stringr::str_split("") %>% # Split string into letters
    purrr::map(~sort(.) %>% paste(collapse = "")) %>% # Sort and re-combine
    as_vector() # Convert list into vector
}

## rank_by_group.R
#' @title
#' Rank a data frame by grouping variable using base R
#'
#' @description
#' This function ranks a specified column in a data frame by group using entirely base R functions.
#' The underlying function is `rank()`, where additional arguments can be passed with `...`.
#' The grouping variable is specified as a string using the argument `group_var`, and the variable to rank is
#' specified using the argument `rank_var`. The operation is analogous to using `group_by()` followed by
#' `mutate()` in {dplyr}.
#' See example below using the base dataset `iris`.

## repeat rows based on n
# multiply values based on weights

wtest <-
  data.frame(
    x = c("cats", "dogs", "birds", "cats"),
    y = c(1, 2, 3, 2)
  )


wtest[rep(seq_len(nrow(wtest)), wtest$y),]
	Sub parse_data()
	Dim lr As Long
	Dim ws As Worksheet
	Dim vcol, i As Integer
	Dim icol As Long
	Dim myarr As Variant
	Dim title As String
	Dim titlerow As Integer

	'This macro splits data into multiple worksheets based on the variables on a column found in Excel.
	#' @title Convert a numeric value into a natural language approximation string
	#'
	#' @description
	#' This function takes a numeric value and returns a string that approximates the value in natural language.
	#'
	#' @param x A numeric value.
	#'
	#' @examples
	#' approx_num(0.5)
	#' # [1] "increased by a half"
	# data cleaning and utility
	import numpy as np
	import pandas as pd
	import vivainsights as vi
	import os

	# timing code
	import time
	import random
	import sys
	import requests
	import pandas as pd

	package_name = "vivainsights"
	api_endpoint = f"https://pypistats.org/api/packages/{package_name}/overall"

	response = requests.get(api_endpoint)

	if response.status_code == 200:
	data = response.json()
	library(tidyverse)
	library(networkD3)

	## Nodes data frame describing all the nodes in the network
	## The first entry in nodes dataframe is node 0, the next entry is node 1 and so on.
	## The nodes dataframe must be sorted according to this sequence.
	## This is the only way to tie the nodes dataframe to the links dataframe.
	TestNodes <- data.frame(name = c("Alpha",
	"Beta",
	"Cat",
	# See <https://rpubs.com/mbounthavong/sample_size_power_analysis_R>

	library(pwr)

	# Sample size estimations for two proportions
	# `pwr::ES.h()` computes effect size for two proportions
	# n provides required sample size

	p0 <- pwr.2p.test(h = ES.h(p1 = 0.60, p2 = 0.50), sig.level = 0.05, power = .80)
	plot(p0)
	# estimate sample size via power analysis
	from statsmodels.stats.power import TTestIndPower

	# parameters for power analysis
	effect = 0.8
	alpha = 0.05
	power = 0.8

	# perform power analysis
	analysis = TTestIndPower()
	#' Sorts letters in a character string by alphabetical order
	#'
	#' Vectorised
	str_arrange <- function(x){
	x %>%
	stringr::str_split("") %>% # Split string into letters
	purrr::map(~sort(.) %>% paste(collapse = "")) %>% # Sort and re-combine
	as_vector() # Convert list into vector
	}
	#' @title
	#' Rank a data frame by grouping variable using base R
	#'
	#' @description
	#' This function ranks a specified column in a data frame by group using entirely base R functions.
	#' The underlying function is `rank()`, where additional arguments can be passed with `...`.
	#' The grouping variable is specified as a string using the argument `group_var`, and the variable to rank is
	#' specified using the argument `rank_var`. The operation is analogous to using `group_by()` followed by
	#' `mutate()` in {dplyr}.
	#' See example below using the base dataset `iris`.
	# multiply values based on weights

	wtest <-
	data.frame(
	x = c("cats", "dogs", "birds", "cats"),
	y = c(1, 2, 3, 2)
	)


	wtest[rep(seq_len(nrow(wtest)), wtest$y),]