Created
October 3, 2020 20:21
-
-
Save gvyshnya/7644fd77567051203ad96d95fbc7ef2a to your computer and use it in GitHub Desktop.
AutoViz Issues
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np # linear algebra | |
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv) | |
import datetime as dt | |
from typing import Tuple | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
import plotly.graph_objects as go | |
from plotly.subplots import make_subplots | |
import plotly.express as px | |
import plotly.offline | |
# read data | |
in_kaggle = False | |
# base report output path | |
reports_folder = 'reports/' | |
def get_data_file_path(is_in_kaggle: bool) -> Tuple[str, str, str, str, str]: | |
train_path = '' | |
test_path = '' | |
if is_in_kaggle: | |
# running in Kaggle, inside the competition | |
train_path = '../input/lish-moa/train_features.csv' | |
train_targets_path = '../input/lish-moa/train_targets_scored.csv' | |
train_targets_nonscored_path = '../input/lish-moa/train_targets_nonscored.csv' | |
test_path = '../input/lish-moa/test_features.csv' | |
sample_submission_path = '../input/lish-moa/sample_submission.csv' | |
else: | |
# running locally | |
train_path = 'data/train_features.csv' | |
train_targets_path = 'data/train_targets_scored.csv' | |
train_targets_nonscored_path = 'data/train_targets_nonscored.csv' | |
test_path = 'data/test_features.csv' | |
sample_submission_path = 'data/sample_submission.csv' | |
return train_path, train_targets_path, train_targets_nonscored_path, test_path, sample_submission_path | |
# Import data | |
train_set_path, train_set_targets_path, train_set_targets_nonscored_path, test_set_path, sample_subm_path = get_data_file_path(in_kaggle) | |
a = pd.read_csv(train_set_path) | |
b = pd.read_csv(test_set_path) | |
c = pd.read_csv(train_set_targets_nonscored_path) | |
d = pd.read_csv(train_set_targets_path) | |
merged = pd.concat([a,b]) | |
# Datasets for treated and control experiments | |
treated = a[a['cp_type'] == 'trt_cp'] | |
control = a[a['cp_type'] == 'ctl_vehicle'] | |
# Treatment time datasets | |
cp24 = a[a['cp_time']== 24] | |
cp48 = a[a['cp_time']== 48] | |
cp72 = a[a['cp_time']== 72] | |
# Merge scored and nonscored labels | |
all_drugs = pd.merge(d, c, on='sig_id', how='inner') | |
# Treated drugs without control | |
treated_list = treated['sig_id'].to_list() | |
drugs_tr = d[d['sig_id'].isin(treated_list)] | |
# Non-treated control observations | |
control_list = control['sig_id'].to_list() | |
drugs_cntr = d[d['sig_id'].isin(control_list)] | |
# Treated drugs: | |
nonscored = c[c['sig_id'].isin(treated_list)] | |
scored = d[d['sig_id'].isin(treated_list)] | |
# adt = All Drugs Treated | |
adt = all_drugs[all_drugs['sig_id'].isin(treated_list)] | |
# Select the columns c- | |
c_cols = [col for col in a.columns if 'c-' in col] | |
# Filter the columns c- | |
cells_tr = treated[c_cols] | |
cells_cntr = control[c_cols] | |
# Select the columns g- | |
g_cols = [col for col in a.columns if 'g-' in col] | |
# Filter the columns g- | |
genes_tr = treated[g_cols] | |
genes_cntr = control[g_cols] | |
from autoviz.AutoViz_Class import AutoViz_Class | |
AV = AutoViz_Class() | |
dft = AV.AutoViz(filename='', sep='' , depVar='cp_time', dfte=treated, header=0, verbose=2, lowess=False, | |
chart_format='svg', max_rows_analyzed=2500, max_cols_analyzed=40) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment