Skip to content

Instantly share code, notes, and snippets.

@Createdd
Created September 26, 2020 14:53
Show Gist options
  • Save Createdd/23e521c13ca5a3fec49752966cf3b1dc to your computer and use it in GitHub Desktop.
Save Createdd/23e521c13ca5a3fec49752966cf3b1dc to your computer and use it in GitHub Desktop.
def pre_process(df):
cols_too_many_missing = ['new_tests',
'new_tests_per_thousand',
'total_tests_per_thousand',
'total_tests',
'tests_per_case',
'positive_rate',
'new_tests_smoothed',
'new_tests_smoothed_per_thousand',
'tests_units',
'handwashing_facilities']
df = df.drop(columns=cols_too_many_missing)
nominal = df.select_dtypes(include=['object']).copy()
nominal_cols = nominal.columns.tolist()
for col in nominal_cols:
col
if df[col].isna().sum() > 0:
df[col].fillna('MISSING', inplace=True)
df[col] = encoder.fit_transform(df[col])
numerical = df.select_dtypes(include=['float64']).copy()
for col in numerical:
df[col].fillna((df[col].mean()), inplace=True)
X = df.drop(columns=['new_cases'])
y = df.new_cases
return X, y
def get_prediction_params(input_val, url_to_covid):
df_orig = pd.read_csv(url_to_covid)
_ = encoder.fit_transform(df_orig['location'])
encode_ind = (encoder.classes_).tolist().index(input_val)
df_orig[df_orig.location == input_val]
X, _ = pre_process(df_orig)
to_pred = X[X.location == encode_ind].iloc[-1].values.reshape(1,-1)
return to_pred
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment