Skip to content

Instantly share code, notes, and snippets.

@Ranner198
Last active October 20, 2019 16:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Ranner198/ba9fb99726618882b82cd7dc52ca4c4a to your computer and use it in GitHub Desktop.
Save Ranner198/ba9fb99726618882b82cd7dc52ca4c4a to your computer and use it in GitHub Desktop.
TrillSpaceApp
# -*- coding: utf-8 -*-
"""TrillApp.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1b0qU_SkDafVEvhYmLhRgYzO_B-cu3KwQ
"""
# Tasks!
## Look at the Data
## Graph some portion of it
## Decide what portions we want to predict
## Research what kinds prediction options we have.
## Pick two and see which one seems to have the best results immediately.
## Work on perfecting the solution
src = 'https://data.nasa.gov/api/views/mc52-syum/rows.csv?accessType=DOWNLOAD'
# Space Apps Challenge
# Lost Data Chasers
import pandas as pd
import numpy as np
# Read data into variable
data = pd.read_csv(src)
cleanData = data.dropna()
print(data.columns)
with pd.option_context('display.max_rows', None, 'display.max_columns', None): # more options can be specified also
print(data)
import requests
response = requests.get('https://ssd-api.jpl.nasa.gov/fireball.api?limit=2000&vel-comp=true')
# from pandas.io.json import json_normalize
# print(response.json()['data'])
x = response.json()
print(x.keys())
df = pd.DataFrame(x['data'], dtype=float)
# df = pd.io.json.json_normalize(response.json)
# df = pd.read_json()
df.columns= x['fields']
# print(df)
print(x['fields'])
print(x['count'])
print(x['signature'])
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
print(df[:40]) #.dropna()
df_na_free = df.dropna()
print(df_na_free[:6]['vel'].to_list())
df_na_free['vel'] = pd.to_numeric(df_na_free['vel'])
df_na_free['impact-e'] = pd.to_numeric(df_na_free['impact-e'])
df_na_free.plot(x='impact-e',y='vel',kind='scatter')
# 3D Scatter
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure()
ax = Axes3D(fig)
cleanDF = df.dropna()
df_clean = df.dropna()
ax.scatter(df_clean['vx'], df_clean['vy'], df_clean['vz'])
ax.set_ylabel('Datapoints: ' + str(df_clean.shape[0]))
plt.show()
df_print_all = df.dropna()
print(df_na_free[:6]['vel'].to_list())
df_na_free.plot(x='impact-e',y='vel',kind='scatter')
"""# Machine Learning"""
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression
temp = df.copy()
temp.drop(['date'], axis = 1, inplace = True)
temp.loc[df['lat-dir'] == 'S', 'lat-dir'] = -1
temp.loc[df['lat-dir'] == 'N', 'lat-dir'] = 1
temp.loc[df['lon-dir'] == 'E', 'lon-dir'] = 1
temp.loc[df['lon-dir'] == 'W', 'lon-dir'] = -1
#X,y = make_regression(n_samples=len(df.dropna()), n_features=9, n_informative=3, n_targets=9, tail_strength=0.5, noise=0.02, shuffle=False, coef=False, random_state=0)
X = temp
y = temp
icols = temp.columns
jcols = icols
ML = pd.concat([pd.DataFrame(X, columns=icols), pd.DataFrame(y, columns=jcols)], axis=1)
ML.head()
from sklearn.ensemble import RandomForestRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
df_notnans = ML.dropna()
from warnings import simplefilter
simplefilter(action='ignore', category=FutureWarning)
X_train, X_test, y_train, y_test = train_test_split(df_notnans[icols], df_notnans[jcols], train_size = 0.81, random_state=4)
max_depth = 30
regr_multirf = MultiOutputRegressor(RandomForestRegressor(max_depth=max_depth,
random_state=0))
regr_multirf.fit(X_train, y_train)
regr_rf = RandomForestRegressor(max_depth=max_depth, random_state=2)
regr_rf.fit(X_train, y_train)
# Predict on new data
y_multirf = regr_multirf.predict(X_test)
y_rf = regr_rf.predict(X_test)
# Check the prediction score
scores = regr_multirf.score(X_test, y_test)
print("The prediction score on the test data is {:.2f}%".format(scores*100))
plt.figure()
s = 50
a = 0.4
plt.scatter(y_test.iloc[:, 0], y_test.iloc[:, 1],
c="navy", s=s, marker="s", alpha=a, label="Data")
plt.scatter(y_multirf[:, 0], y_multirf[:, 1],
c="cornflowerblue", s=s, alpha=a,
label="Multi RF score=%.2f" % regr_multirf.score(X_test, y_test))
plt.scatter(y_rf[:, 0], y_rf[:, 1],
c="c", s=s, marker="^", alpha=a,
label="RF score=%.2f" % regr_rf.score(X_test, y_test))
plt.xlim([-6, 6])
plt.ylim([-6, 6])
plt.xlabel("X Testing", color='white')
plt.ylabel("Y Testing", color='white')
plt.title("Comparing random forests and the multi-output meta estimator")
plt.legend()
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment