Created
August 20, 2018 06:22
-
-
Save va2577/2ca817ca3d34fb1f6ab715bc967c9e10 to your computer and use it in GitHub Desktop.
ヒストリカルデータ 2018/08/20
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Historical Data\n", | |
"\n", | |
"## データ\n", | |
"\n", | |
"```console\n", | |
"Time (UTC),Open,High,Low,Close,Volume\n", | |
"2003.05.04 21:00:00,118.94,118.952,118.94,118.952,253\n", | |
"2003.05.04 21:01:00,118.961,118.967,118.958,118.967,154.6\n", | |
"2003.05.04 21:02:00,118.972,118.972,118.955,118.955,219.7\n", | |
"2003.05.04 21:03:00,118.953,118.961,118.949,118.949,309.9\n", | |
"2003.05.04 21:04:00,118.953,118.953,118.946,118.946,229.4\n", | |
"2003.05.04 21:05:00,118.952,118.954,118.944,118.944,112.2\n", | |
"2003.05.04 21:06:00,118.95,118.952,118.945,118.945,170.2\n", | |
"2003.05.04 21:07:00,118.947,118.956,118.947,118.947,124.5\n", | |
"2003.05.04 21:08:00,118.946,118.954,118.934,118.934,355\n", | |
"```" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import calendar\n", | |
"import datetime\n", | |
"import functools\n", | |
"import pandas as pd" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"data = [\n", | |
" 'AUDJPY',\n", | |
" 'AUDNZD',\n", | |
" 'AUDUSD',\n", | |
" 'CADJPY',\n", | |
" 'CHFJPY',\n", | |
" 'EURAUD',\n", | |
" 'EURGBP',\n", | |
" 'EURJPY',\n", | |
" 'EURUSD',\n", | |
" 'GBPAUD',\n", | |
" 'GBPJPY',\n", | |
" 'GBPUSD',\n", | |
" 'HKDJPY',\n", | |
" 'NZDJPY',\n", | |
" 'NZDUSD',\n", | |
" 'SGDJPY',\n", | |
" 'USDCAD',\n", | |
" 'USDCHF',\n", | |
" 'USDHKD',\n", | |
" 'USDJPY',\n", | |
" 'USDSGD'\n", | |
"]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def read(filepath):\n", | |
" dtype = { 'time': str, 'open': float, 'high': float, 'low': float, 'close': float, 'volume': float }\n", | |
" names = ['time', 'open', 'high', 'low', 'close', 'volume']\n", | |
" df = pd.read_csv(filepath, dtype=dtype, header=0, index_col='time', names=names, parse_dates=['time'])\n", | |
" return df" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def dstdt(year, month, week, hours):\n", | |
" day = [i for i in calendar.Calendar().itermonthdays2(year, month) if i[0] != 0 and i[1] == calendar.SUNDAY][week][0]\n", | |
" return datetime.datetime(year, month, day, 2, 0) + datetime.timedelta(hours=-hours)\n", | |
"\n", | |
"def dst(year):\n", | |
" dic = {\n", | |
" 1918: [(4, 0, -5), (10, -1, -4)],\n", | |
" 2007: [(3, 1, -5), (11, 0, -4)]\n", | |
" }\n", | |
" key = functools.reduce(lambda x, y: x if year < y else y, dic.keys(), 0)\n", | |
" if key == 0:\n", | |
" return []\n", | |
" return [dstdt(year, i[0], i[1], i[2]) for i in dic[key]]\n", | |
"\n", | |
"def offset(df):\n", | |
" data = { 'open': df['open'].values, 'high': df['high'].values, 'low': df['low'].values, 'close': df['close'].values, 'volume': df['volume'].values }\n", | |
" columns = ['open', 'high', 'low', 'close', 'volume']\n", | |
"\n", | |
"# index = df.index + pd.DateOffset(hours=3)\n", | |
"\n", | |
"# index = [i + pd.DateOffset(hours=(3 if i.tz_localize('UTC').tz_convert('America/New_York').dst().seconds > 0 else 2)) for i in df.index]\n", | |
"\n", | |
"# di = df.index.tz_localize('UTC').tz_convert('America/New_York')\n", | |
"# l = [i.dst().seconds > 0 for i in di]\n", | |
"# is_dst = pd.Series(l)\n", | |
"# eest = df.index[is_dst] + pd.DateOffset(hours=3)\n", | |
"# eet = df.index[~is_dst] + pd.DateOffset(hours=2)\n", | |
"# index = pd.concat([eest.to_series(), eet.to_series()])\n", | |
"\n", | |
" l = [dst(i) for i in range(2001, 2018)]\n", | |
" l2 = [(i[0] <= df.index) & (df.index < i[1]) for i in l]\n", | |
" is_dst = functools.reduce(lambda x, y: x | y, l2)\n", | |
" eest = df.index[is_dst] + pd.DateOffset(hours=3)\n", | |
" eet = df.index[~is_dst] + pd.DateOffset(hours=2)\n", | |
" index = pd.concat([eest.to_series(), eet.to_series()]).sort_values()\n", | |
"\n", | |
" df = pd.DataFrame(data=data, columns=columns, index=index)\n", | |
" return df" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def resample(df, rule):\n", | |
" df2 = df.resample(rule).ohlc()\n", | |
" df3 = df.resample(rule).sum()\n", | |
" data = { 'open': df2['open']['open'].values, 'high': df2['high']['high'].values, 'low': df2['low']['low'].values, 'close': df2['close']['close'].values, 'volume': df3['volume'].values }\n", | |
" columns = ['open', 'high', 'low', 'close', 'volume']\n", | |
" df4 = pd.DataFrame(data=data, columns=columns, index=df2.index).dropna()\n", | |
" return df4" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def main(filepath, rule, decimals, to):\n", | |
" df = read(filepath)\n", | |
" df2 = offset(df)\n", | |
" df3 = resample(df2, rule)\n", | |
" df4 = df3.round({ 'open': decimals[0], 'high': decimals[0], 'low': decimals[0], 'close': decimals[0], 'volume': decimals[1] })\n", | |
" df4.to_csv(to)\n", | |
" return to, df4['close'].count()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 2min 47s, sys: 36.1 s, total: 3min 23s\n", | |
"Wall time: 3min 29s\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"[('~/Documents/data/d/AUDJPY_D.csv', 3677),\n", | |
" ('~/Documents/data/d/AUDNZD_D.csv', 2886),\n", | |
" ('~/Documents/data/d/AUDUSD_D.csv', 3762),\n", | |
" ('~/Documents/data/d/CADJPY_D.csv', 3442),\n", | |
" ('~/Documents/data/d/CHFJPY_D.csv', 3762),\n", | |
" ('~/Documents/data/d/EURAUD_D.csv', 3193),\n", | |
" ('~/Documents/data/d/EURGBP_D.csv', 3762),\n", | |
" ('~/Documents/data/d/EURJPY_D.csv', 3762),\n", | |
" ('~/Documents/data/d/EURUSD_D.csv', 3827),\n", | |
" ('~/Documents/data/d/GBPAUD_D.csv', 3075),\n", | |
" ('~/Documents/data/d/GBPJPY_D.csv', 3762),\n", | |
" ('~/Documents/data/d/GBPUSD_D.csv', 3827),\n", | |
" ('~/Documents/data/d/HKDJPY_D.csv', 2821),\n", | |
" ('~/Documents/data/d/NZDJPY_D.csv', 3131),\n", | |
" ('~/Documents/data/d/NZDUSD_D.csv', 3762),\n", | |
" ('~/Documents/data/d/SGDJPY_D.csv', 2821),\n", | |
" ('~/Documents/data/d/USDCAD_D.csv', 3762),\n", | |
" ('~/Documents/data/d/USDCHF_D.csv', 3827),\n", | |
" ('~/Documents/data/d/USDHKD_D.csv', 2821),\n", | |
" ('~/Documents/data/d/USDJPY_D.csv', 3827),\n", | |
" ('~/Documents/data/d/USDSGD_D.csv', 3426)]" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# data = ['AUDJPY']\n", | |
"%time [main(f'~/Documents/data/{i}_1 Min_Bid_2000.01.01_2018.01.01.csv', 'd', (3 if 'JPY' in i else 5, 1), f'~/Documents/data/d/{i}_D.csv') for i in data]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 3min 26s, sys: 37.5 s, total: 4min 3s\n", | |
"Wall time: 4min 9s\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"[('~/Documents/data/h/AUDJPY_H.csv', 88226),\n", | |
" ('~/Documents/data/h/AUDNZD_H.csv', 69240),\n", | |
" ('~/Documents/data/h/AUDUSD_H.csv', 90266),\n", | |
" ('~/Documents/data/h/CADJPY_H.csv', 82583),\n", | |
" ('~/Documents/data/h/CHFJPY_H.csv', 90266),\n", | |
" ('~/Documents/data/h/EURAUD_H.csv', 76602),\n", | |
" ('~/Documents/data/h/EURGBP_H.csv', 90266),\n", | |
" ('~/Documents/data/h/EURJPY_H.csv', 90266),\n", | |
" ('~/Documents/data/h/EURUSD_H.csv', 91826),\n", | |
" ('~/Documents/data/h/GBPAUD_H.csv', 73776),\n", | |
" ('~/Documents/data/h/GBPJPY_H.csv', 90266),\n", | |
" ('~/Documents/data/h/GBPUSD_H.csv', 91826),\n", | |
" ('~/Documents/data/h/HKDJPY_H.csv', 67675),\n", | |
" ('~/Documents/data/h/NZDJPY_H.csv', 75122),\n", | |
" ('~/Documents/data/h/NZDUSD_H.csv', 90266),\n", | |
" ('~/Documents/data/h/SGDJPY_H.csv', 67675),\n", | |
" ('~/Documents/data/h/USDCAD_H.csv', 90266),\n", | |
" ('~/Documents/data/h/USDCHF_H.csv', 91826),\n", | |
" ('~/Documents/data/h/USDHKD_H.csv', 67675),\n", | |
" ('~/Documents/data/h/USDJPY_H.csv', 91826),\n", | |
" ('~/Documents/data/h/USDSGD_H.csv', 82182)]" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# data = ['AUDJPY']\n", | |
"%time [main(f'~/Documents/data/{i}_1 Min_Bid_2000.01.01_2018.01.01.csv', 'h', (3 if 'JPY' in i else 5, 1), f'~/Documents/data/h/{i}_H.csv') for i in data]" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.5" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment