Last active
November 21, 2017 07:20
-
-
Save puyokw/5255b0286c34868f2bca to your computer and use it in GitHub Desktop.
td_intern rossmann
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
%matplotlib inline | |
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import matplotlib.dates | |
import datetime | |
import pandas_td as td | |
import os | |
con = td.connect(apikey=os.environ['TD_API_KEY'],endpoint='https://api.treasuredata.com/') | |
engine = con.query_engine(database='rossmann', type='presto') | |
# because of the default limit 10,000 | |
train = td.read_td_table('train_original', engine, limit=1017209) | |
train.head() | |
test = td.read_td_table('test_original', engine) | |
test.head() | |
store = td.read_td_table('store_raw', engine) | |
store.head() | |
# date(string) to float | |
datetimes = [datetime.datetime.strptime(t, "%Y-%m-%d") for t in train.date] | |
plotData = matplotlib.dates.date2num(datetimes) | |
plotData= pd.DataFrame(plotData, columns=['datetimes']) | |
train = train.join(plotData) | |
def splitTime(x): | |
mysplit = datetime.datetime.strptime(x, "%Y-%m-%d") | |
return [mysplit.year,mysplit.month,mysplit.day] | |
# 2014-11-12 -> year=2014, month=11, day=12 | |
train = train.join( pd.DataFrame(train.date.apply(splitTime).tolist(), columns = ['year','mon','day'])) | |
# visualize | |
train=train.sort(['datetimes']) | |
plt.figure(1,figsize=(20,10)) | |
plt.plot_date(train.loc[train.store==1,'datetimes'],train.loc[train.store==1,'sales'],linestyle='-') | |
plt.title('store 1') | |
plt.savefig('sales_store1.png') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Comments from one of our customer.
■実際のデータにはミリ秒が含まれていたので、カットする記述を追加
■結合時に「重複しているよ」という警告が出たので、サフィックスを追加
■条件が文字列で指定していましたが数値なのでシングルクオテーションを外す