Skip to content

Instantly share code, notes, and snippets.

@Hio-Been
Last active May 28, 2016 12:12
Show Gist options
  • Save Hio-Been/faba03d188452981c1ed to your computer and use it in GitHub Desktop.
Save Hio-Been/faba03d188452981c1ed to your computer and use it in GitHub Desktop.
#! -*-coding: utf-8 -*-
__author__ = 'Hio-been'
import pandas.io.data as web
from datetime import datetime
import matplotlib.pyplot as plt
import os, sys; reload(sys); sys.setdefaultencoding('utf-8')
import numpy as np
#https://gist.github.com/Han-Hiobeen/faba03d188452981c1ed
data_source = 'yahoo'
""" # stock_list 는 해당 종목의 코드를 어떠한 형태로든지 입력만 하면 알아서 됨
stock_list = {
'Samsung Life Insurance' : '032830.KS',
'Samsung Electronics' : '005930.KS',
'Posco' : '005490.KS',
'Hana Financial Group' : '086790.KS',
'SK Hynix' : '000660.KS',
'Hyundai Mobis' : '012330.KS',
'Hyundai Motor' : '005380.KS',
'Shinhan Financial Group' : '055550.KS',
'KB Financial Group' : '105560.KS',
'KIA Motors' : '000270.KS',
}
"""
stock_list = {
'Skyworks Solutions Inc' : 'SWKS',
'SK Telecom' : 'SKM',
'Sketchers USA' : 'SKX',
'Amorepacific' : '090430.KS',
'Tanger Factory Outlet' : 'SKT',
'DreamWorks Animations' : 'DWA',
'SkyWest Inc' : 'SKYW',
'Ingram Micro Inc' : 'IM',
'Microsoft' : 'MSFT',
'Apple Inc' : 'AAPL'
}
# 몇년이든 상관없음
start = datetime(2000, 05, 01)
end = datetime(2015, 04, 30)
""" 월별 데이터 산출 """
output_file_name_1 = 'result_1_monthly.csv'
output_file_1 = open(output_file_name_1, 'w')
output_file_1.write('period,company,start_price,end_price,return_rate\n')
""" 연도별 데이터 산출 """
output_file_name_2 = 'result_2_yearly.csv'
output_file_2 = open(output_file_name_2, 'w')
output_file_2.write('period,company,avg_returns,std_returns\n')
""" 전체 총합 데이터 산출 """
output_file_name_3 = 'result_3_total.csv'
output_file_3 = open(output_file_name_3, 'w')
output_file_3.write('company,return_avg,return_std\n')
pic_file_name = 'result_5.png'
way_to_draw = 1;
""" 데이터 처리용 기본 변수, 리스트 생성 """
years_list = ['2010', '2011', '2012', '2013', '2014', '2015']
months_list = ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12']
global_company_name_list, global_return_list, global_risk_list, yearly_data_list = [],[],[],[]
fig1 = plt.figure()
sub1 = fig1.add_subplot(111)
"""
try:
from psychopy import gui, core, event
dlg = gui.DlgFromDict(dictionary=stock_list, title=__author__)
if dlg.OK == False: core.quit()
except:
pass
"""
""" 루프 구조
(1) Company 별로 크게 루프를 돌리되,
(2) 2010년부터 연도별로 순차적으로 돌고,
(3) 다시 그 안에서는 01월부터 순차적으로 돌면서 (daily)
> 월별 데이터 처리 / 저장 (monthly)
> 연도별 데이터 처리 / 저장 (yearly)
> 회사별 5년 평균 데이터 저장 (total)
"""
yearly_corps = []; monthly_corps = [];
for i in range(len(stock_list.keys())): # Dummy Lists
exec 'companies_%s = []'%i
exec 'yearly_corps.append(companies_%s)'%i
exec 'monthly_corps.append(companies_%s)'%i
for company_index in range(len(stock_list.keys())):
company_name = stock_list.keys()[company_index]
company_code = stock_list[company_name]
#yearly_corps[company_index].append(company_name)
monthly_corps[company_index].append(company_name)
print 'loop index %s : data processing for %s '%(company_index+1, company_name)
# 데이터 끌어오기 (주어진 company_code로 data_source에서, start부터 end 기간까지)
full_data = web.DataReader(company_code, data_source, start, end)
monthly_return_list = [] # 처리된 데이터를 담을 idle 리스트 생성
yearly_return_list = []
dates_list = []
for year in years_list: # 한 회사 내, 연도별 루프 시작
for month in months_list: # 한 회사 in 한 연도 내, 월별 루프 시작
date = '%s-%s'%(year, month)
try:
# 이번 달의 Adj Close 데이터만 긁어모음
monthly_data = full_data[date]['Adj Close']
# 이번 달의 처음 값[0]과 마지막 값[-1]을 각각 긁어옴
monthly_start_price, monthly_ending_price = monthly_data[0], monthly_data[-1];
# 사실 필요 없지만, 이번 달의 30일 간 Adj Close 가격 평균값과 표준편차를 구함
#monthly_mean, monthly_local_std = monthly_data.mean(), monthly_data.std();
# 이번 달의 return은 (끝값-시작값)/시작값
monthly_return = (float(monthly_ending_price) - float(monthly_start_price)) / float(monthly_start_price);
# 아까 만들어둔 idle list에 값 하나씩 저장
monthly_return_list.append(monthly_return);
# 외부 엑셀 파일에도 한 줄씩 저장
output_file_1.write('%s,%s,%.6f,%.6f,%.6f\n'%(date,company_name,monthly_start_price,monthly_ending_price,monthly_return))
monthly_corps[company_index].append(monthly_return)
dates_list.append(date)
except: # 2010년 1~3월까지, 혹은 2015년 5월 이후의 기간인 경우에는
pass # 그냥 패스
# month를 12월까지 다 돌았으면, 이번 년도의 데이터를 뭉뚱그려 저장
yearly_mean_return, yearly_mean_risk = np.mean(monthly_return_list), np.std(monthly_return_list)
#yearly_return_list.append(yearly_mean_return)
yearly_corps[company_index].append(yearly_mean_return)
# 이쯤에서 연도 별 데이터 저장
output_file_2.write('%s,%s,%.6f,%.6f\n'%(year,company_name,yearly_mean_return, yearly_mean_risk))
#print yearly_corps[company_index]
# 이쯤에서 5년 평균 데이터 저장
total_mean_return, total_mean_risk = np.mean(monthly_return_list), np.std(monthly_return_list)
output_file_3.write('%s,%.6f,%.6f\n'%(company_name,total_mean_return,total_mean_risk))
# 5년 평균 데이터 시각화를 위한 List 형식의 데이터 생성
global_company_name_list.append(company_name)
global_return_list.append(total_mean_return)
global_risk_list.append(total_mean_risk)
if way_to_draw == 0: # 한 기업 당 하나의 그래프를 그리고 싶을 때
single_plot = full_data[['Adj Close']].plot(grid=True, figsize=(10,8))
single_plot.set_title(company_name)
elif way_to_draw == 1: # 10개의 기업을 하나의 그래프 안에 구겨넣고 싶을 때
subplot = fig1.add_subplot( 5 , 2 , company_index)
subplot.plot(full_data.index, full_data['Adj Close'])
fig1.subplots_adjust(left=.05, bottom=.05, right=.96, top=.9, wspace=.11, hspace=0.65)
subplot.set_title(company_name)
else:
pass
print 'Import process done'
plt.show();
# 각 기업의 Risk와 Return을 가지고 Scatter plot도 해보고 싶음
plt.scatter(global_risk_list, global_return_list, marker = 'o')
for label, x, y in zip(global_company_name_list,global_risk_list,global_return_list):
plt.annotate(
label,
xy = (x, y), xytext = (-20, 20),
textcoords = 'offset points', ha = 'left', va = 'bottom',
bbox = dict(boxstyle = 'round, pad=0.66', fc = 'blue', alpha = 0.33),
arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3, rad=0'))
plt.xlabel('5 Year Total Standard Deviation');
plt.ylabel('5 Year Total Average Return')
plt.show()
output_file_1.close()
output_file_2.close()
output_file_3.close()
#os.system('open %s'%output_file_name)
#os.system('results_1.txt')
#os.system('%s'%output_file_name_1)
#os.system('%s'%output_file_name_2)
#os.system('%s'%output_file_name_3)
""" 연도별 데이터 산출 """
idle_list_for_corr_mtx, idle_list_for_ret_calculation = [], []
output_file_name_4 ='result_4.csv'
output_file_4 = open(output_file_name_4, 'w')
output_file_4.write('Company')
for years in years_list:
output_file_4.write(',%s'%years)
output_file_4.write(',Avg,Std\n')
for yearly_list in yearly_corps:
for contents in yearly_list:
try:
output_file_4.write(',%.6f'%contents)
except:
output_file_4.write('%s'%contents)
#print yearly_list
try:
output_file_4.write(',%.6f,%.6f\n'%(np.mean(yearly_list[1:]), np.std(yearly_list[1:])))
except:
output_file_4.write(',%.6f,%.6f\n'%(np.mean(yearly_list[2:]), np.std(yearly_list[2:])))
idle_list_for_corr_mtx.append(yearly_list[1:])
"""월별 데이터 산출"""
output_file_name_6 = 'result_6.csv'
output_file_6 = open(output_file_name_6, 'w')
output_file_6.write('Date')
for companies in monthly_corps:
output_file_6.write(',%s'%companies[0])
output_file_6.write('\n');
for date_index in range(len(dates_list)):
output_file_6.write('%s'%dates_list[date_index]);
for companies in monthly_corps:
#print companies
output_file_6.write(',%s'%companies[date_index+1])
output_file_6.write('\n')
output_file_6.close()
os.system('%s'%output_file_name_6)
#os.system('open %s'%output_file_name_4)
corr_matrix = np.corrcoef(idle_list_for_corr_mtx)
#print corr_matrix
#
plt.imshow(corr_matrix)
plt.title('Correlation Table between 10 Corps.')
plt.colorbar()
plt.show()
output_file_name_5 = 'result_5.csv'
output_file_5 = open(output_file_name_5, 'w')
output_file_5.write('X,Y')
for corps in stock_list.keys():
output_file_5.write(',%s'%corps)
output_file_5.write('\n')
plt.show()
x_list, y_list = [], []; plt.ion()
def get_answer(weight_array):
answer_vol = weight_array.T.dot(corr_matrix).dot(weight_array)
weight_index = 0
answer_ret = 0
for weight in weight_array:
current_value = weight * np.mean(yearly_corps[weight_index][1:])
answer_ret = answer_ret + current_value
weight_index += 1
#print 'rtrn = %s'%answer_ret
#print 'volt = %s'%answer_vol
return answer_vol, answer_ret
def random_sampling(sampling_number):
#for companies in
for i in range(sampling_number):
random_array = np.random.random(len(stock_list.keys()))/sum(np.random.random(len(stock_list.keys())))
x, y = get_answer(random_array);
#x_list.append(x); y_list.append(y);
""" Unit Option """
x, y = (x), 100*(y)
#x, y = np.log(x), np.log(y)
current_color = [float(i)/float(sampling_number), 0,
1-(float(i)/float(sampling_number))]
sub3.scatter(x, y,c=current_color,marker='x',linewidths=3);
#sub3.scatter(x_list, y_list,c='r',marker='s',linewidths=1);
plt.pause(0.000001)
#plt.axis([-.005, .02, -.33, 2])
plt.show()
plt.xlabel('Risk');
plt.ylabel('Return ( Unit : % )')
plt.title('Simulation with randomly generated weights'+'\n\n'+
'nSample = %s / %s'%(i,sampling_number))
output_file_5.write('%.6f,%.6f'%(x,y))
for parameters in random_array:
output_file_5.write(',%s'%parameters)
output_file_5.write('\n')
plt.grid(color='k', linestyle='-', linewidth=.2)
#pic_file_name_2 = '%s_%s.png'%('pic_results_',i)
#if np.random.random() > 0: # for backup
# pass#plt.savefig(pic_file_name_2)
fig3 = plt.figure()
sub3 = fig3.add_subplot(111);
random_sampling(500)
plt.savefig(pic_file_name)
output_file_5.close()
plt.savefig(pic_file_name)
os.system('%s'%pic_file_name)
print 'Analysis done'
os.system('open %s'%pic_file_name)
sys.exit()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment