Skip to content

Instantly share code, notes, and snippets.

Avatar

Turning out data tricks since 2006! thistleknot

View GitHub Profile
@thistleknot
thistleknot / laferriere.py
Created Jan 27, 2022
Laferriere Transform
View laferriere.py
dataFrame = pd.DataFrame()
for f in range(0,len(df.iloc[:,0:10].columns)):
column = f
df_scale = x = StandardScaler().fit_transform(df.iloc[:,0:10])
df_scaled = pd.DataFrame(df_scale,columns=df.iloc[:,0:10].columns).set_index(df.index)
#df.describe()
#df_scaled.describe()
#preprocessing.scale(df.iloc[:,0:10], with_mean=True, with_std=True)
View batchParallel.py
ccfs = []
count = 0
#batch parallel
for r in range(0,(len(r_s))):
print(r_s[r])
print((r_s[r]+distance))
print(cleaned.columns[r_s[r]:min(r_s[r]+distance,len(cleaned.columns))])
@thistleknot
thistleknot / season_nonseasonal_undifferencing.py
Last active Jan 7, 2022
seasonal and non seasonal undifferencing
View season_nonseasonal_undifferencing.py
def undiff(data, seasonal, nonseasonal, xi):
print("you have to know what xi for which use case you are going to use")
#nonseasonal
if(nonseasonal!=0 and seasonal==0):
temp = np.concatenate([np.array(xi),np.array(data)])
temp_ = diff_inv(temp,1,nonseasonal)
return(temp_[-len(data):])
View zca_svd.R
ZCA_svd <- function(x)
{
internal <- svd(x)
U = internal$u
#print(U)
#Vt = internal$v
Vt = t(internal$v)
#print(Vt)
@thistleknot
thistleknot / undifference.R
Last active Dec 11, 2021
undifference seasonally (and non seasonally) differenced data
View undifference.R
raw <- read.csv("https://raw.githubusercontent.com/thistleknot/Python-Stock/master/data/combined_set.csv",row.names=1,header=TRUE)
nv_diff_sets <- function(var_of_int,dataset,f_casts)
{
s_=sndif_[which(colnames(raw)==var_of_int)]
d_=ndif_[which(colnames(raw)==var_of_int)]
startRow = which(rownames(raw)==rownames(dataset[1:d_,,drop=FALSE]))
@thistleknot
thistleknot / onTheFlyColumns.R
Last active Dec 5, 2021
Create Columns in R on the fly using lapply (functional programming)
View onTheFlyColumns.R
Create columns on the fly
combo_s <- do.call(cbind,lapply(1:length(sndif_), function(d)
{
if(sndif_[d]*season == 0)
{
temp <- raw[,d,drop=FALSE]
}else
{
temp <- raw[,d,drop=FALSE]
for(dif in 1:sndif_[d])
@thistleknot
thistleknot / BackStepBestSubsetZCAFilter.R
Last active Dec 5, 2021
Use ZCA (vs PCOR) to iterate from least significant correlations to most
View BackStepBestSubsetZCAFilter.R
sig_table = matrix(0, ncol=ncol(newDF_t))
colnames(sig_table) <- colnames(newDF_t)
signs_table = matrix(0, ncol=ncol(newDF_t))
colnames(signs_table) <- colnames(newDF_t)
p_threshold = .05
New_Names = colnames(newDF_t)[2:length(colnames(newDF_t))]
iteration=0
View appendProxyRepos.sh
#!/bin/bash
proxy="replaceme"
for file in *.repo; do
echo $file
for name in $(grep '\[*\]' $file | sed 's/[][]//g'); do
echo $name
sed "/^\[$name\]/a\{$proxy}" $file | sed 's/[{}]//g' >> temp-$file
done
@thistleknot
thistleknot / rpy2image.py
Created Nov 21, 2021
rpy2 image example
View rpy2image.py
import rpy2.robjects as ro
from rpy2.robjects.packages import importr
grdevices = importr('grDevices')
grdevices.png(file="Rpy2Curve.png", width=512, height=512)
p = ro.r('curve(sin, -2*pi, 2*pi)')
# plotting code here
grdevices.dev_off()
from IPython.display import display
View clusterAnova.py
from rpy2.robjects import pandas2ri
from rpy2.robjects.conversion import localconverter
from rpy2.robjects.packages import importr
import numpy as np
import os
import pandas as pd
import rpy2
import rpy2.robjects as ro
wd = os.getcwd()