This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
# load the boston data set | |
from sklearn.datasets import load_boston | |
boston = load_boston() | |
# convert to a Pandas Data Frame | |
boston_pd = pd.DataFrame(data= np.c_[boston['data'],boston['target']], | |
columns= np.append(boston['feature_names'], 'target')).sample(frac=1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# create feature encodings for the event and description fields | |
es = ft.EntitySet(id="plays") | |
es = es.entity_from_dataframe(entity_id="plays", dataframe=plays_df, index="play_id", | |
variable_types = { "event": ft.variable_types.Categorical, | |
"description": ft.variable_types.Categorical }) | |
f1 = Feature(es["plays"]["event"]) | |
f2 = Feature(es["plays"]["description"]) | |
encoded, _= ft.encode_features(plays_df, [f1, f2], top_n=10) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import featuretools as ft | |
rawEventsDF = ... # load from data warehouse | |
# 1-hot encode the raw event data | |
es = ft.EntitySet(id="events") | |
es = es.entity_from_dataframe(entity_id="events", dataframe=rawDataDF) | |
feature_matrix, defs = ft.dfs(entityset=es, target_entity="events", max_depth=1) | |
encodedDF, encoders = ft.encode_features(feature_matrix, defs) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import featuretools as ft | |
from pyspark.sql.functions import pandas_udf, PandasUDFType | |
@pandas_udf(schema, PandasUDFType.GROUPED_MAP) | |
def apply_feature_generation(pandasInputDF): | |
# create Entity Set representation | |
es = ft.EntitySet(id="events") | |
es = es.entity_from_dataframe(entity_id="events", dataframe=pandasInputDF) | |
es = es.normalize_entity(base_entity_id="events", new_entity_id="users", index="user_id") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(CausalImpact) | |
data <- read.csv(file = "DailySessions.csv") | |
# Create a DataFrame and plot the input data | |
ts <- cbind(data$test, data$control) | |
matplot(ts, type = "l") | |
# Use two week prior and post periods and plot results | |
pre.period <- c(1, 14) | |
post.period <- c(15, 30) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(boot) | |
data <- read.csv("UserSessions.csv") | |
# Function for computing the difference of differences | |
run_DiD <- function(data, indices){ | |
d <- data[indices,] | |
new <- mean(d$postval[d$group=='Test'])/mean(d$priorval[d$group=='Test']) | |
old <-mean(d$postval[d$expgroup=='Control'])/mean(d$priorval[d$expgroup=='Control']) | |
return((new - old)/old * 100.0) | |
} |
NewerOlder