Created
June 8, 2017 00:46
-
-
Save m-ueno/9f398525071432b841c325694d75836a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
# ### データ分析の目的 | |
# | |
# * 最高速度を上げたい, 平均速度を上げたい | |
# * 速度を上げるために、速度と因果関係にある「操作」が何か知りたい | |
# * IoTセンサーから得られる値を用いたサービス開発/デバッグの課題抽出 | |
# | |
# ### データから確かめられそうなこと | |
# | |
# * speedと, pitch角・roll角の間に相関があるかないか(横井さん仮説) | |
# * => 9軸センサー値からpitch, rollを出す勉強が必要. もうちょっと簡単なところから | |
# * speedと, gyroの差分の大きさ(ブレの大きさ)に相関があるかないか | |
# * => MOTIONデータのウィンドウ内のmedian と、そのときのspeeds | |
# * timestampフィールドを補完する | |
# * ある時間window内の角速度medianと、そのときの速度をだす | |
# * windowをいくつか変えて相関の有無をそれぞれだす [未] | |
# In[1]: | |
import csv | |
import re | |
from io import StringIO | |
import numpy as np | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
from IPython.display import display as d | |
import matplotlib.cm as cm | |
get_ipython().magic('matplotlib inline') | |
# In[107]: | |
# In[124]: | |
def load_from_csv(csvpath, data_type='GPRMC', col_names=[]): | |
s = StringIO() | |
with open(csvpath, encoding='cp932') as f: | |
for line in f: | |
if line.startswith('${}'.format(data_type)): | |
s.write(line) | |
if len(line.split(r'\r')) > 1: | |
print(line) | |
# for l in line.split(r'\r'): | |
# s.write(l) | |
s.seek(0) | |
return pd.read_csv(s, header=None, low_memory=False, error_bad_lines=False, usecols=range(10)) | |
def load_all_from_csv(csvpath): | |
df = load_from_csv(csvpath, data_type='') | |
return df | |
def parse_gps_from_df_all(df_all): | |
'''全部入りからGPSレコードだけ取り出し、ヘッダを変える''' | |
df = df_all[df_all[0] == '$GPRMC'] | |
df = df.rename(columns={ | |
0: 'datatype', | |
1: 'time', | |
2: 'warning', | |
3: 'lat', | |
4: '##', | |
5: 'lng', | |
6: '#', | |
7: 'speed', | |
8: 'cource', | |
9: 'date', | |
10: 'magnetic_variation', | |
11: '###', | |
12: 'mandatory_checksum', | |
}) | |
df = df.dropna(axis=1, how='all') | |
df = df[ df.warning == 'A'] | |
df = df.apply(lambda x: pd.to_numeric(x, errors='ignore')) | |
return df | |
def parse_motion_from_df_all(df_all): | |
'''全部入りからMOTIONレコードだけ取り出し、ヘッダを変える''' | |
df_motion = df_all[df_all[0] == '$MOTION'] | |
df_motion = df_motion.rename(columns={ | |
0: 'datatype', | |
1: '#', | |
2: 'id', | |
3: 'delta', | |
4: 'ax', | |
5: 'ay', | |
6: 'az', | |
7: 'gyro_x', | |
8: 'gyro_y', | |
9: 'gyro_z', | |
10: 'mx', | |
11: 'my', | |
12: 'mz', | |
13: 'barometer' | |
}) | |
df_motion = df_motion.dropna(axis=1, how='all') | |
df_motion = df_motion.apply(lambda x: pd.to_numeric(x, errors='ignore')) | |
# convert_objects(convert_numeric=True) | |
return df_motion | |
# cleancsv_path = '17041602_clean.csv' | |
# csv_path = 'windhack-csv-20170601/17041602.CSV' # speedがふっとんでる | |
csv_path = 'windhack-csv-20170601/17041602_01.CSV' | |
df_all = load_all_from_csv(csv_path) | |
df_motion = parse_motion_from_df_all(df_all) | |
df_gps = parse_gps_from_df_all(df_all) | |
print('done') | |
# ## GPSの軌跡 | |
# In[125]: | |
def show_gps(): | |
# scatter plot (x=lat, y=lng) | |
_df = df_gps | |
x = _df.lat | |
y = _df.lng | |
z = _df.speed | |
plt.scatter(x, y, cmap=plt.cm.hot, c=z) | |
show_gps() | |
# ## スピード | |
# | |
# 縦軸がスピード。単位はknot. 1 knot≒1.8km/h | |
# | |
# だいたい5ノットで航行. 10km/h? 遅すぎる? | |
# | |
# 15ノットで27km/h. | |
# In[126]: | |
def show_speed(): | |
df_gps.speed.plot() | |
show_speed() # todo: high pass filter | |
# ## MOTIONとGPSテーブルをマージして、speedと相関しそうな値を出してみる | |
# | |
# まずはgyroセンサーx,y,zの値の2乗平均平方根と、Speedが相関するかみてみる. | |
# | |
# 確かにスピードが大きいとき、gyroの値は小さいようにみえる。 | |
# In[127]: | |
def add_window_index_to_motion_data(): | |
for i in range(df_gps.shape[0] - 1): | |
start = df_gps.index[i] | |
end = df_gps.index[i+1] | |
records = df_motion[start:end] | |
time_start = df_gps.time[start] | |
time_end = df_gps.time[end] | |
nrecords = records.shape[0] | |
n = df_motion[start:end].shape[0] | |
records_time = np.linspace(time_start, time_end, nrecords) | |
df_motion.ix[start:end, 'window_index'] = i | |
if not 'window_index' in df_motion.columns: | |
add_window_index_to_motion_data() | |
def show_speed_gyro_scatter(): | |
df_motion_grouped = df_motion.groupby('window_index').mean() | |
df_gps_grouped = df_gps[:-1] | |
x = df_motion_grouped.gyro_x | |
y = df_motion_grouped.gyro_y | |
z = df_motion_grouped.gyro_z | |
gyro_abs = (x*x+y*y+z*z).apply(np.sqrt) | |
print('x: abs(gyro) ,y:speed [knots]') | |
ax = plt.scatter(gyro_abs, df_gps_grouped.speed, alpha=.1) | |
show_speed_gyro_scatter() | |
# In[23]: | |
from pandas.tools.plotting import scatter_matrix | |
def show_scatter_matrix(): | |
cols = ['ay', 'az', 'gyro_x', 'gyro_z'] | |
df_motion_grouped = df_motion.groupby('window_index').median() | |
df_gps_grouped = df_gps[:-1] | |
_df = df_motion_grouped[cols] | |
_df = pd.concat([_df, df_gps_grouped.speed], axis=1) | |
scatter_matrix(_df, diagonal='kde') | |
# show_scatter_matrix() # 分布は分かるが、matrixはよく分からない | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment