This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data_collections = tru.get_data_collections() | |
for dc in data_collections: | |
tru.set_data_collection(dc) | |
data_splits = tru.get_data_splits() | |
for split in data_splits: | |
tru.tester.add_performance_test( | |
data_split_name = split, | |
metric = 'AUC', | |
warn_if_less_than = 0.85, | |
fail_if_less_than = 0.80 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data_collections = tru.get_data_collections() | |
for dc in data_collections: | |
tru.set_data_collection(dc) | |
splits = tru.get_data_splits() | |
for split_key in splits: | |
split = f'{split_key}' | |
tru.tester.add_stability_test( | |
comparison_data_split_name = split, | |
base_data_split_name = 'train', | |
metric = 'DIFFERENCE_OF_MEAN', |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import h5py | |
def load_data_for_year(year) -> pd.DataFrame: | |
if year > 2016: | |
filename = f'GFED4.1s_{year}_beta.hdf5' | |
else: | |
filename = f'GFED4.1s_{year}.hdf5' | |
filepath = os.path.join(data_dir, filename) | |
data = h5py.File(filepath, 'r') | |
df = pd.DataFrame({ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_feature_for_year_and_month(data_cache,year, month, suffix) -> pd.DataFrame: | |
if year not in data_cache: | |
data_cache[year] = load_data_for_year(year) | |
data = data_cache[year] | |
df = pd.DataFrame({ | |
f'burned_fraction_{suffix}': data['burned_area/{:02}/burned_fraction'.format(month)], | |
f'emissions_DM_{suffix}': data['emissions/{:02}/DM'.format(month)], | |
f'emissions_C_{suffix}': data['emissions/{:02}/C'.format(month)], | |
f'emissions_small_fire_fraction_{suffix}': data['emissions/{:02}/small_fire_fraction'.format(month)], | |
f'biosphere_NPP_{suffix}': data['biosphere/{:02}/NPP'.format(month)], |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import plotly.express as px | |
px.set_mapbox_access_token(mapbox_token) | |
fig = px.scatter_mapbox(df_usa_2016.drop('year',axis=1), color = "month", lat="lat", lon="lon", size="burned_area", size_max=15, zoom=10) | |
fig.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
models = {} | |
import time | |
for window_size in range(1,11): | |
time_start = time.time() | |
key = f'{window_size}year_window' | |
print(f'Training linear model for {key}') | |
models[f'linear_{key}'] = LogisticRegression(random_state=321, max_iter=1000, solver='saga').fit(data_train_x[key], data_train_y[key]>0.01) | |
n_est = 20 | |
print(f'Training gb{n_est} model for {key}') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
project_name = 'Fire_Party' | |
tru.set_environment('local') | |
tru.add_project(project_name, score_type='probits') | |
extra_data_columns = ['year'] | |
train_split_name = 'train' | |
burned_fraction_th = 0.01 | |
for window_size in range(1,11): | |
key = f'{window_size}year_window' | |
print(key) | |
tru.add_data_collection(key) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class target_encoder(BaseEstimator, TransformerMixin): | |
def __init__(self): | |
pass | |
def fit(self, X, y = None): | |
return self | |
def transform(self, X, y = None): | |
#target encode lat and long |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class target_encoder(BaseEstimator, TransformerMixin): | |
def __init__(self): | |
pass | |
def fit(self, X, y = None): | |
return self | |
def transform(self, X, y = None): | |
#target encode lat and long |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
tru.set_data_collection("data_collection") | |
splits = tru.get_data_splits() | |
for split in splits: | |
tru.set_data_collection("data_collection") | |
tru.set_data_split(split) | |
xs = tru.get_xs() | |
ys = tru.get_ys() | |
tru.set_data_collection("data_collection_v2") | |
ys_mean = ys.mean() | |
ys_std = ys.std() |