Skip to content

Instantly share code, notes, and snippets.

@va2577
Created August 20, 2018 06:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save va2577/2ca817ca3d34fb1f6ab715bc967c9e10 to your computer and use it in GitHub Desktop.
Save va2577/2ca817ca3d34fb1f6ab715bc967c9e10 to your computer and use it in GitHub Desktop.
ヒストリカルデータ 2018/08/20
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Historical Data\n",
"\n",
"## データ\n",
"\n",
"```console\n",
"Time (UTC),Open,High,Low,Close,Volume\n",
"2003.05.04 21:00:00,118.94,118.952,118.94,118.952,253\n",
"2003.05.04 21:01:00,118.961,118.967,118.958,118.967,154.6\n",
"2003.05.04 21:02:00,118.972,118.972,118.955,118.955,219.7\n",
"2003.05.04 21:03:00,118.953,118.961,118.949,118.949,309.9\n",
"2003.05.04 21:04:00,118.953,118.953,118.946,118.946,229.4\n",
"2003.05.04 21:05:00,118.952,118.954,118.944,118.944,112.2\n",
"2003.05.04 21:06:00,118.95,118.952,118.945,118.945,170.2\n",
"2003.05.04 21:07:00,118.947,118.956,118.947,118.947,124.5\n",
"2003.05.04 21:08:00,118.946,118.954,118.934,118.934,355\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import calendar\n",
"import datetime\n",
"import functools\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"data = [\n",
" 'AUDJPY',\n",
" 'AUDNZD',\n",
" 'AUDUSD',\n",
" 'CADJPY',\n",
" 'CHFJPY',\n",
" 'EURAUD',\n",
" 'EURGBP',\n",
" 'EURJPY',\n",
" 'EURUSD',\n",
" 'GBPAUD',\n",
" 'GBPJPY',\n",
" 'GBPUSD',\n",
" 'HKDJPY',\n",
" 'NZDJPY',\n",
" 'NZDUSD',\n",
" 'SGDJPY',\n",
" 'USDCAD',\n",
" 'USDCHF',\n",
" 'USDHKD',\n",
" 'USDJPY',\n",
" 'USDSGD'\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"def read(filepath):\n",
" dtype = { 'time': str, 'open': float, 'high': float, 'low': float, 'close': float, 'volume': float }\n",
" names = ['time', 'open', 'high', 'low', 'close', 'volume']\n",
" df = pd.read_csv(filepath, dtype=dtype, header=0, index_col='time', names=names, parse_dates=['time'])\n",
" return df"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"def dstdt(year, month, week, hours):\n",
" day = [i for i in calendar.Calendar().itermonthdays2(year, month) if i[0] != 0 and i[1] == calendar.SUNDAY][week][0]\n",
" return datetime.datetime(year, month, day, 2, 0) + datetime.timedelta(hours=-hours)\n",
"\n",
"def dst(year):\n",
" dic = {\n",
" 1918: [(4, 0, -5), (10, -1, -4)],\n",
" 2007: [(3, 1, -5), (11, 0, -4)]\n",
" }\n",
" key = functools.reduce(lambda x, y: x if year < y else y, dic.keys(), 0)\n",
" if key == 0:\n",
" return []\n",
" return [dstdt(year, i[0], i[1], i[2]) for i in dic[key]]\n",
"\n",
"def offset(df):\n",
" data = { 'open': df['open'].values, 'high': df['high'].values, 'low': df['low'].values, 'close': df['close'].values, 'volume': df['volume'].values }\n",
" columns = ['open', 'high', 'low', 'close', 'volume']\n",
"\n",
"# index = df.index + pd.DateOffset(hours=3)\n",
"\n",
"# index = [i + pd.DateOffset(hours=(3 if i.tz_localize('UTC').tz_convert('America/New_York').dst().seconds > 0 else 2)) for i in df.index]\n",
"\n",
"# di = df.index.tz_localize('UTC').tz_convert('America/New_York')\n",
"# l = [i.dst().seconds > 0 for i in di]\n",
"# is_dst = pd.Series(l)\n",
"# eest = df.index[is_dst] + pd.DateOffset(hours=3)\n",
"# eet = df.index[~is_dst] + pd.DateOffset(hours=2)\n",
"# index = pd.concat([eest.to_series(), eet.to_series()])\n",
"\n",
" l = [dst(i) for i in range(2001, 2018)]\n",
" l2 = [(i[0] <= df.index) & (df.index < i[1]) for i in l]\n",
" is_dst = functools.reduce(lambda x, y: x | y, l2)\n",
" eest = df.index[is_dst] + pd.DateOffset(hours=3)\n",
" eet = df.index[~is_dst] + pd.DateOffset(hours=2)\n",
" index = pd.concat([eest.to_series(), eet.to_series()]).sort_values()\n",
"\n",
" df = pd.DataFrame(data=data, columns=columns, index=index)\n",
" return df"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"def resample(df, rule):\n",
" df2 = df.resample(rule).ohlc()\n",
" df3 = df.resample(rule).sum()\n",
" data = { 'open': df2['open']['open'].values, 'high': df2['high']['high'].values, 'low': df2['low']['low'].values, 'close': df2['close']['close'].values, 'volume': df3['volume'].values }\n",
" columns = ['open', 'high', 'low', 'close', 'volume']\n",
" df4 = pd.DataFrame(data=data, columns=columns, index=df2.index).dropna()\n",
" return df4"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"def main(filepath, rule, decimals, to):\n",
" df = read(filepath)\n",
" df2 = offset(df)\n",
" df3 = resample(df2, rule)\n",
" df4 = df3.round({ 'open': decimals[0], 'high': decimals[0], 'low': decimals[0], 'close': decimals[0], 'volume': decimals[1] })\n",
" df4.to_csv(to)\n",
" return to, df4['close'].count()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 2min 47s, sys: 36.1 s, total: 3min 23s\n",
"Wall time: 3min 29s\n"
]
},
{
"data": {
"text/plain": [
"[('~/Documents/data/d/AUDJPY_D.csv', 3677),\n",
" ('~/Documents/data/d/AUDNZD_D.csv', 2886),\n",
" ('~/Documents/data/d/AUDUSD_D.csv', 3762),\n",
" ('~/Documents/data/d/CADJPY_D.csv', 3442),\n",
" ('~/Documents/data/d/CHFJPY_D.csv', 3762),\n",
" ('~/Documents/data/d/EURAUD_D.csv', 3193),\n",
" ('~/Documents/data/d/EURGBP_D.csv', 3762),\n",
" ('~/Documents/data/d/EURJPY_D.csv', 3762),\n",
" ('~/Documents/data/d/EURUSD_D.csv', 3827),\n",
" ('~/Documents/data/d/GBPAUD_D.csv', 3075),\n",
" ('~/Documents/data/d/GBPJPY_D.csv', 3762),\n",
" ('~/Documents/data/d/GBPUSD_D.csv', 3827),\n",
" ('~/Documents/data/d/HKDJPY_D.csv', 2821),\n",
" ('~/Documents/data/d/NZDJPY_D.csv', 3131),\n",
" ('~/Documents/data/d/NZDUSD_D.csv', 3762),\n",
" ('~/Documents/data/d/SGDJPY_D.csv', 2821),\n",
" ('~/Documents/data/d/USDCAD_D.csv', 3762),\n",
" ('~/Documents/data/d/USDCHF_D.csv', 3827),\n",
" ('~/Documents/data/d/USDHKD_D.csv', 2821),\n",
" ('~/Documents/data/d/USDJPY_D.csv', 3827),\n",
" ('~/Documents/data/d/USDSGD_D.csv', 3426)]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# data = ['AUDJPY']\n",
"%time [main(f'~/Documents/data/{i}_1 Min_Bid_2000.01.01_2018.01.01.csv', 'd', (3 if 'JPY' in i else 5, 1), f'~/Documents/data/d/{i}_D.csv') for i in data]"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 3min 26s, sys: 37.5 s, total: 4min 3s\n",
"Wall time: 4min 9s\n"
]
},
{
"data": {
"text/plain": [
"[('~/Documents/data/h/AUDJPY_H.csv', 88226),\n",
" ('~/Documents/data/h/AUDNZD_H.csv', 69240),\n",
" ('~/Documents/data/h/AUDUSD_H.csv', 90266),\n",
" ('~/Documents/data/h/CADJPY_H.csv', 82583),\n",
" ('~/Documents/data/h/CHFJPY_H.csv', 90266),\n",
" ('~/Documents/data/h/EURAUD_H.csv', 76602),\n",
" ('~/Documents/data/h/EURGBP_H.csv', 90266),\n",
" ('~/Documents/data/h/EURJPY_H.csv', 90266),\n",
" ('~/Documents/data/h/EURUSD_H.csv', 91826),\n",
" ('~/Documents/data/h/GBPAUD_H.csv', 73776),\n",
" ('~/Documents/data/h/GBPJPY_H.csv', 90266),\n",
" ('~/Documents/data/h/GBPUSD_H.csv', 91826),\n",
" ('~/Documents/data/h/HKDJPY_H.csv', 67675),\n",
" ('~/Documents/data/h/NZDJPY_H.csv', 75122),\n",
" ('~/Documents/data/h/NZDUSD_H.csv', 90266),\n",
" ('~/Documents/data/h/SGDJPY_H.csv', 67675),\n",
" ('~/Documents/data/h/USDCAD_H.csv', 90266),\n",
" ('~/Documents/data/h/USDCHF_H.csv', 91826),\n",
" ('~/Documents/data/h/USDHKD_H.csv', 67675),\n",
" ('~/Documents/data/h/USDJPY_H.csv', 91826),\n",
" ('~/Documents/data/h/USDSGD_H.csv', 82182)]"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# data = ['AUDJPY']\n",
"%time [main(f'~/Documents/data/{i}_1 Min_Bid_2000.01.01_2018.01.01.csv', 'h', (3 if 'JPY' in i else 5, 1), f'~/Documents/data/h/{i}_H.csv') for i in data]"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment