Ranner198/trillapp.py

## trillapp.py
# -*- coding: utf-8 -*-
"""TrillApp.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1b0qU_SkDafVEvhYmLhRgYzO_B-cu3KwQ
"""

# Tasks!
## Look at the Data
## Graph some portion of it
## Decide what portions we want to predict
## Research what kinds prediction options we have.
## Pick two and see which one seems to have the best results immediately.
## Work on perfecting the solution
src = 'https://data.nasa.gov/api/views/mc52-syum/rows.csv?accessType=DOWNLOAD'

# Space Apps Challenge
# Lost Data Chasers
import pandas as pd
import numpy as np

# Read data into variable
data = pd.read_csv(src)

cleanData = data.dropna()

print(data.columns)

with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
    print(data)

import requests
response = requests.get('https://ssd-api.jpl.nasa.gov/fireball.api?limit=2000&vel-comp=true')
# from pandas.io.json import json_normalize
# print(response.json()['data'])
x = response.json()
print(x.keys())
df = pd.DataFrame(x['data'], dtype=float)
# df = pd.io.json.json_normalize(response.json)
# df = pd.read_json()
df.columns= x['fields']
# print(df)
print(x['fields'])
print(x['count'])
print(x['signature'])
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
  print(df[:40])  #.dropna()

df_na_free = df.dropna()
print(df_na_free[:6]['vel'].to_list())
df_na_free['vel'] = pd.to_numeric(df_na_free['vel'])
df_na_free['impact-e'] = pd.to_numeric(df_na_free['impact-e'])
df_na_free.plot(x='impact-e',y='vel',kind='scatter')

# 3D Scatter
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure()
ax = Axes3D(fig)

cleanDF = df.dropna()


df_clean = df.dropna()

ax.scatter(df_clean['vx'], df_clean['vy'], df_clean['vz'])
ax.set_ylabel('Datapoints: ' + str(df_clean.shape[0]))
plt.show()

df_print_all = df.dropna()
print(df_na_free[:6]['vel'].to_list())
df_na_free.plot(x='impact-e',y='vel',kind='scatter')

"""# Machine Learning"""

from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression

temp = df.copy()
temp.drop(['date'], axis = 1, inplace = True)

temp.loc[df['lat-dir'] == 'S', 'lat-dir'] = -1
temp.loc[df['lat-dir'] == 'N', 'lat-dir'] = 1
temp.loc[df['lon-dir'] == 'E', 'lon-dir'] = 1
temp.loc[df['lon-dir'] == 'W', 'lon-dir'] = -1

#X,y = make_regression(n_samples=len(df.dropna()), n_features=9, n_informative=3, n_targets=9, tail_strength=0.5, noise=0.02, shuffle=False, coef=False, random_state=0)

X = temp
y = temp

icols = temp.columns
jcols = icols
ML = pd.concat([pd.DataFrame(X, columns=icols), pd.DataFrame(y, columns=jcols)], axis=1)

ML.head()

from sklearn.ensemble import RandomForestRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
df_notnans = ML.dropna()

from warnings import simplefilter
simplefilter(action='ignore', category=FutureWarning)

X_train, X_test, y_train, y_test = train_test_split(df_notnans[icols], df_notnans[jcols], train_size = 0.81, random_state=4)

max_depth = 30
regr_multirf = MultiOutputRegressor(RandomForestRegressor(max_depth=max_depth,
                                                          random_state=0))

regr_multirf.fit(X_train, y_train)
regr_rf = RandomForestRegressor(max_depth=max_depth, random_state=2)
regr_rf.fit(X_train, y_train)

# Predict on new data
y_multirf = regr_multirf.predict(X_test)
y_rf = regr_rf.predict(X_test)

# Check the prediction score
scores = regr_multirf.score(X_test, y_test)
print("The prediction score on the test data is {:.2f}%".format(scores*100))

plt.figure()
s = 50
a = 0.4
plt.scatter(y_test.iloc[:, 0], y_test.iloc[:, 1],
            c="navy", s=s, marker="s", alpha=a, label="Data")
plt.scatter(y_multirf[:, 0], y_multirf[:, 1],
            c="cornflowerblue", s=s, alpha=a,
            label="Multi RF score=%.2f" % regr_multirf.score(X_test, y_test))
plt.scatter(y_rf[:, 0], y_rf[:, 1],
            c="c", s=s, marker="^", alpha=a,
            label="RF score=%.2f" % regr_rf.score(X_test, y_test))
plt.xlim([-6, 6])
plt.ylim([-6, 6])

plt.xlabel("X Testing", color='white')
plt.ylabel("Y Testing", color='white')
plt.title("Comparing random forests and the multi-output meta estimator")
plt.legend()
plt.show()
	# -- coding: utf-8 --
	"""TrillApp.ipynb

	Automatically generated by Colaboratory.

	Original file is located at
	https://colab.research.google.com/drive/1b0qU_SkDafVEvhYmLhRgYzO_B-cu3KwQ
	"""

	# Tasks!
	## Look at the Data
	## Graph some portion of it
	## Decide what portions we want to predict
	## Research what kinds prediction options we have.
	## Pick two and see which one seems to have the best results immediately.
	## Work on perfecting the solution
	src = 'https://data.nasa.gov/api/views/mc52-syum/rows.csv?accessType=DOWNLOAD'

	# Space Apps Challenge
	# Lost Data Chasers
	import pandas as pd
	import numpy as np

	# Read data into variable
	data = pd.read_csv(src)

	cleanData = data.dropna()

	print(data.columns)

	with pd.option_context('display.max_rows', None, 'display.max_columns', None): # more options can be specified also
	print(data)

	import requests
	response = requests.get('https://ssd-api.jpl.nasa.gov/fireball.api?limit=2000&vel-comp=true')
	# from pandas.io.json import json_normalize
	# print(response.json()['data'])
	x = response.json()
	print(x.keys())
	df = pd.DataFrame(x['data'], dtype=float)
	# df = pd.io.json.json_normalize(response.json)
	# df = pd.read_json()
	df.columns= x['fields']
	# print(df)
	print(x['fields'])
	print(x['count'])
	print(x['signature'])
	with pd.option_context('display.max_rows', None, 'display.max_columns', None):
	print(df[:40]) #.dropna()

	df_na_free = df.dropna()
	print(df_na_free[:6]['vel'].to_list())
	df_na_free['vel'] = pd.to_numeric(df_na_free['vel'])
	df_na_free['impact-e'] = pd.to_numeric(df_na_free['impact-e'])
	df_na_free.plot(x='impact-e',y='vel',kind='scatter')

	# 3D Scatter
	import matplotlib.pyplot as plt
	from mpl_toolkits.mplot3d import Axes3D
	fig = plt.figure()
	ax = Axes3D(fig)

	cleanDF = df.dropna()


	df_clean = df.dropna()

	ax.scatter(df_clean['vx'], df_clean['vy'], df_clean['vz'])
	ax.set_ylabel('Datapoints: ' + str(df_clean.shape[0]))
	plt.show()

	df_print_all = df.dropna()
	print(df_na_free[:6]['vel'].to_list())
	df_na_free.plot(x='impact-e',y='vel',kind='scatter')

	"""# Machine Learning"""

	from sklearn.datasets import make_regression
	from sklearn.linear_model import LinearRegression

	temp = df.copy()
	temp.drop(['date'], axis = 1, inplace = True)

	temp.loc[df['lat-dir'] == 'S', 'lat-dir'] = -1
	temp.loc[df['lat-dir'] == 'N', 'lat-dir'] = 1
	temp.loc[df['lon-dir'] == 'E', 'lon-dir'] = 1
	temp.loc[df['lon-dir'] == 'W', 'lon-dir'] = -1

	#X,y = make_regression(n_samples=len(df.dropna()), n_features=9, n_informative=3, n_targets=9, tail_strength=0.5, noise=0.02, shuffle=False, coef=False, random_state=0)

	X = temp
	y = temp

	icols = temp.columns
	jcols = icols
	ML = pd.concat([pd.DataFrame(X, columns=icols), pd.DataFrame(y, columns=jcols)], axis=1)

	ML.head()

	from sklearn.ensemble import RandomForestRegressor
	from sklearn.multioutput import MultiOutputRegressor
	from sklearn.model_selection import train_test_split
	import matplotlib.pyplot as plt
	df_notnans = ML.dropna()

	from warnings import simplefilter
	simplefilter(action='ignore', category=FutureWarning)

	X_train, X_test, y_train, y_test = train_test_split(df_notnans[icols], df_notnans[jcols], train_size = 0.81, random_state=4)

	max_depth = 30
	regr_multirf = MultiOutputRegressor(RandomForestRegressor(max_depth=max_depth,
	random_state=0))

	regr_multirf.fit(X_train, y_train)
	regr_rf = RandomForestRegressor(max_depth=max_depth, random_state=2)
	regr_rf.fit(X_train, y_train)

	# Predict on new data
	y_multirf = regr_multirf.predict(X_test)
	y_rf = regr_rf.predict(X_test)

	# Check the prediction score
	scores = regr_multirf.score(X_test, y_test)
	print("The prediction score on the test data is {:.2f}%".format(scores*100))

	plt.figure()
	s = 50
	a = 0.4
	plt.scatter(y_test.iloc[:, 0], y_test.iloc[:, 1],
	c="navy", s=s, marker="s", alpha=a, label="Data")
	plt.scatter(y_multirf[:, 0], y_multirf[:, 1],
	c="cornflowerblue", s=s, alpha=a,
	label="Multi RF score=%.2f" % regr_multirf.score(X_test, y_test))
	plt.scatter(y_rf[:, 0], y_rf[:, 1],
	c="c", s=s, marker="^", alpha=a,
	label="RF score=%.2f" % regr_rf.score(X_test, y_test))
	plt.xlim([-6, 6])
	plt.ylim([-6, 6])

	plt.xlabel("X Testing", color='white')
	plt.ylabel("Y Testing", color='white')
	plt.title("Comparing random forests and the multi-output meta estimator")
	plt.legend()
	plt.show()