Created
January 20, 2018 05:24
-
-
Save pangyuteng/c263290d5b640de31c8ae2496b066d2c to your computer and use it in GitHub Desktop.
starter code for numerai with keras
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import os | |
os.environ['CUDA_VISIBLE_DEVICES'] = '' | |
import pandas as pd | |
import numpy as np | |
from keras import optimizers | |
from keras.utils import to_categorical | |
from keras.models import Model | |
from keras.layers import Input, Dense | |
from keras.layers.normalization import BatchNormalization | |
from keras.layers.core import Dropout, Activation | |
def main(): | |
# Set seed for reproducibility | |
np.random.seed(0) | |
print("Loading data...") | |
# Load the data from the CSV files | |
training_data = pd.read_csv('numerai_training_data.csv', header=0) | |
prediction_data = pd.read_csv('numerai_tournament_data.csv', header=0) | |
# Transform the loaded CSV data into numpy arrays | |
features = [f for f in list(training_data) if "feature" in f] | |
X = training_data[features] | |
Y = training_data["target"] | |
x_prediction = prediction_data[features] | |
ids = prediction_data["id"] | |
# ----- | |
X = X.values | |
Y = to_categorical(Y, num_classes=2) | |
batch_size = 64 | |
drop_out = 0.3 | |
m_in = Input(shape=(50,)) | |
m = Dense(50,)(m_in) | |
m = BatchNormalization(axis=-1)(m) | |
m = Activation('relu')(m) | |
m = Dropout(drop_out)(m) | |
m = Dense(2,)(m) | |
m_out = Activation('softmax')(m) | |
model = Model(inputs=m_in, outputs=m_out) | |
opt = optimizers.SGD(lr=0.001, clipnorm=0.9) | |
model.compile(loss='binary_crossentropy',optimizer=opt) | |
model.fit(X,Y,batch_size=batch_size,epochs=20,validation_split=0.3) | |
y_prediction = model.predict(x_prediction) | |
results = y_prediction[:, 1] | |
# ----- | |
results_df = pd.DataFrame(data={'probability':results}) | |
joined = pd.DataFrame(ids).join(results_df) | |
print("Writing predictions to predictions.csv") | |
# Save the predictions out to a CSV file | |
joined.to_csv("predictions.csv", index=False) | |
# Now you can upload these predictions on numer.ai | |
if __name__ == '__main__': | |
main() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment