Last active
May 28, 2016 12:12
-
-
Save Hio-Been/faba03d188452981c1ed to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! -*-coding: utf-8 -*- | |
__author__ = 'Hio-been' | |
import pandas.io.data as web | |
from datetime import datetime | |
import matplotlib.pyplot as plt | |
import os, sys; reload(sys); sys.setdefaultencoding('utf-8') | |
import numpy as np | |
#https://gist.github.com/Han-Hiobeen/faba03d188452981c1ed | |
data_source = 'yahoo' | |
""" # stock_list 는 해당 종목의 코드를 어떠한 형태로든지 입력만 하면 알아서 됨 | |
stock_list = { | |
'Samsung Life Insurance' : '032830.KS', | |
'Samsung Electronics' : '005930.KS', | |
'Posco' : '005490.KS', | |
'Hana Financial Group' : '086790.KS', | |
'SK Hynix' : '000660.KS', | |
'Hyundai Mobis' : '012330.KS', | |
'Hyundai Motor' : '005380.KS', | |
'Shinhan Financial Group' : '055550.KS', | |
'KB Financial Group' : '105560.KS', | |
'KIA Motors' : '000270.KS', | |
} | |
""" | |
stock_list = { | |
'Skyworks Solutions Inc' : 'SWKS', | |
'SK Telecom' : 'SKM', | |
'Sketchers USA' : 'SKX', | |
'Amorepacific' : '090430.KS', | |
'Tanger Factory Outlet' : 'SKT', | |
'DreamWorks Animations' : 'DWA', | |
'SkyWest Inc' : 'SKYW', | |
'Ingram Micro Inc' : 'IM', | |
'Microsoft' : 'MSFT', | |
'Apple Inc' : 'AAPL' | |
} | |
# 몇년이든 상관없음 | |
start = datetime(2000, 05, 01) | |
end = datetime(2015, 04, 30) | |
""" 월별 데이터 산출 """ | |
output_file_name_1 = 'result_1_monthly.csv' | |
output_file_1 = open(output_file_name_1, 'w') | |
output_file_1.write('period,company,start_price,end_price,return_rate\n') | |
""" 연도별 데이터 산출 """ | |
output_file_name_2 = 'result_2_yearly.csv' | |
output_file_2 = open(output_file_name_2, 'w') | |
output_file_2.write('period,company,avg_returns,std_returns\n') | |
""" 전체 총합 데이터 산출 """ | |
output_file_name_3 = 'result_3_total.csv' | |
output_file_3 = open(output_file_name_3, 'w') | |
output_file_3.write('company,return_avg,return_std\n') | |
pic_file_name = 'result_5.png' | |
way_to_draw = 1; | |
""" 데이터 처리용 기본 변수, 리스트 생성 """ | |
years_list = ['2010', '2011', '2012', '2013', '2014', '2015'] | |
months_list = ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12'] | |
global_company_name_list, global_return_list, global_risk_list, yearly_data_list = [],[],[],[] | |
fig1 = plt.figure() | |
sub1 = fig1.add_subplot(111) | |
""" | |
try: | |
from psychopy import gui, core, event | |
dlg = gui.DlgFromDict(dictionary=stock_list, title=__author__) | |
if dlg.OK == False: core.quit() | |
except: | |
pass | |
""" | |
""" 루프 구조 | |
(1) Company 별로 크게 루프를 돌리되, | |
(2) 2010년부터 연도별로 순차적으로 돌고, | |
(3) 다시 그 안에서는 01월부터 순차적으로 돌면서 (daily) | |
> 월별 데이터 처리 / 저장 (monthly) | |
> 연도별 데이터 처리 / 저장 (yearly) | |
> 회사별 5년 평균 데이터 저장 (total) | |
""" | |
yearly_corps = []; monthly_corps = []; | |
for i in range(len(stock_list.keys())): # Dummy Lists | |
exec 'companies_%s = []'%i | |
exec 'yearly_corps.append(companies_%s)'%i | |
exec 'monthly_corps.append(companies_%s)'%i | |
for company_index in range(len(stock_list.keys())): | |
company_name = stock_list.keys()[company_index] | |
company_code = stock_list[company_name] | |
#yearly_corps[company_index].append(company_name) | |
monthly_corps[company_index].append(company_name) | |
print 'loop index %s : data processing for %s '%(company_index+1, company_name) | |
# 데이터 끌어오기 (주어진 company_code로 data_source에서, start부터 end 기간까지) | |
full_data = web.DataReader(company_code, data_source, start, end) | |
monthly_return_list = [] # 처리된 데이터를 담을 idle 리스트 생성 | |
yearly_return_list = [] | |
dates_list = [] | |
for year in years_list: # 한 회사 내, 연도별 루프 시작 | |
for month in months_list: # 한 회사 in 한 연도 내, 월별 루프 시작 | |
date = '%s-%s'%(year, month) | |
try: | |
# 이번 달의 Adj Close 데이터만 긁어모음 | |
monthly_data = full_data[date]['Adj Close'] | |
# 이번 달의 처음 값[0]과 마지막 값[-1]을 각각 긁어옴 | |
monthly_start_price, monthly_ending_price = monthly_data[0], monthly_data[-1]; | |
# 사실 필요 없지만, 이번 달의 30일 간 Adj Close 가격 평균값과 표준편차를 구함 | |
#monthly_mean, monthly_local_std = monthly_data.mean(), monthly_data.std(); | |
# 이번 달의 return은 (끝값-시작값)/시작값 | |
monthly_return = (float(monthly_ending_price) - float(monthly_start_price)) / float(monthly_start_price); | |
# 아까 만들어둔 idle list에 값 하나씩 저장 | |
monthly_return_list.append(monthly_return); | |
# 외부 엑셀 파일에도 한 줄씩 저장 | |
output_file_1.write('%s,%s,%.6f,%.6f,%.6f\n'%(date,company_name,monthly_start_price,monthly_ending_price,monthly_return)) | |
monthly_corps[company_index].append(monthly_return) | |
dates_list.append(date) | |
except: # 2010년 1~3월까지, 혹은 2015년 5월 이후의 기간인 경우에는 | |
pass # 그냥 패스 | |
# month를 12월까지 다 돌았으면, 이번 년도의 데이터를 뭉뚱그려 저장 | |
yearly_mean_return, yearly_mean_risk = np.mean(monthly_return_list), np.std(monthly_return_list) | |
#yearly_return_list.append(yearly_mean_return) | |
yearly_corps[company_index].append(yearly_mean_return) | |
# 이쯤에서 연도 별 데이터 저장 | |
output_file_2.write('%s,%s,%.6f,%.6f\n'%(year,company_name,yearly_mean_return, yearly_mean_risk)) | |
#print yearly_corps[company_index] | |
# 이쯤에서 5년 평균 데이터 저장 | |
total_mean_return, total_mean_risk = np.mean(monthly_return_list), np.std(monthly_return_list) | |
output_file_3.write('%s,%.6f,%.6f\n'%(company_name,total_mean_return,total_mean_risk)) | |
# 5년 평균 데이터 시각화를 위한 List 형식의 데이터 생성 | |
global_company_name_list.append(company_name) | |
global_return_list.append(total_mean_return) | |
global_risk_list.append(total_mean_risk) | |
if way_to_draw == 0: # 한 기업 당 하나의 그래프를 그리고 싶을 때 | |
single_plot = full_data[['Adj Close']].plot(grid=True, figsize=(10,8)) | |
single_plot.set_title(company_name) | |
elif way_to_draw == 1: # 10개의 기업을 하나의 그래프 안에 구겨넣고 싶을 때 | |
subplot = fig1.add_subplot( 5 , 2 , company_index) | |
subplot.plot(full_data.index, full_data['Adj Close']) | |
fig1.subplots_adjust(left=.05, bottom=.05, right=.96, top=.9, wspace=.11, hspace=0.65) | |
subplot.set_title(company_name) | |
else: | |
pass | |
print 'Import process done' | |
plt.show(); | |
# 각 기업의 Risk와 Return을 가지고 Scatter plot도 해보고 싶음 | |
plt.scatter(global_risk_list, global_return_list, marker = 'o') | |
for label, x, y in zip(global_company_name_list,global_risk_list,global_return_list): | |
plt.annotate( | |
label, | |
xy = (x, y), xytext = (-20, 20), | |
textcoords = 'offset points', ha = 'left', va = 'bottom', | |
bbox = dict(boxstyle = 'round, pad=0.66', fc = 'blue', alpha = 0.33), | |
arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3, rad=0')) | |
plt.xlabel('5 Year Total Standard Deviation'); | |
plt.ylabel('5 Year Total Average Return') | |
plt.show() | |
output_file_1.close() | |
output_file_2.close() | |
output_file_3.close() | |
#os.system('open %s'%output_file_name) | |
#os.system('results_1.txt') | |
#os.system('%s'%output_file_name_1) | |
#os.system('%s'%output_file_name_2) | |
#os.system('%s'%output_file_name_3) | |
""" 연도별 데이터 산출 """ | |
idle_list_for_corr_mtx, idle_list_for_ret_calculation = [], [] | |
output_file_name_4 ='result_4.csv' | |
output_file_4 = open(output_file_name_4, 'w') | |
output_file_4.write('Company') | |
for years in years_list: | |
output_file_4.write(',%s'%years) | |
output_file_4.write(',Avg,Std\n') | |
for yearly_list in yearly_corps: | |
for contents in yearly_list: | |
try: | |
output_file_4.write(',%.6f'%contents) | |
except: | |
output_file_4.write('%s'%contents) | |
#print yearly_list | |
try: | |
output_file_4.write(',%.6f,%.6f\n'%(np.mean(yearly_list[1:]), np.std(yearly_list[1:]))) | |
except: | |
output_file_4.write(',%.6f,%.6f\n'%(np.mean(yearly_list[2:]), np.std(yearly_list[2:]))) | |
idle_list_for_corr_mtx.append(yearly_list[1:]) | |
"""월별 데이터 산출""" | |
output_file_name_6 = 'result_6.csv' | |
output_file_6 = open(output_file_name_6, 'w') | |
output_file_6.write('Date') | |
for companies in monthly_corps: | |
output_file_6.write(',%s'%companies[0]) | |
output_file_6.write('\n'); | |
for date_index in range(len(dates_list)): | |
output_file_6.write('%s'%dates_list[date_index]); | |
for companies in monthly_corps: | |
#print companies | |
output_file_6.write(',%s'%companies[date_index+1]) | |
output_file_6.write('\n') | |
output_file_6.close() | |
os.system('%s'%output_file_name_6) | |
#os.system('open %s'%output_file_name_4) | |
corr_matrix = np.corrcoef(idle_list_for_corr_mtx) | |
#print corr_matrix | |
# | |
plt.imshow(corr_matrix) | |
plt.title('Correlation Table between 10 Corps.') | |
plt.colorbar() | |
plt.show() | |
output_file_name_5 = 'result_5.csv' | |
output_file_5 = open(output_file_name_5, 'w') | |
output_file_5.write('X,Y') | |
for corps in stock_list.keys(): | |
output_file_5.write(',%s'%corps) | |
output_file_5.write('\n') | |
plt.show() | |
x_list, y_list = [], []; plt.ion() | |
def get_answer(weight_array): | |
answer_vol = weight_array.T.dot(corr_matrix).dot(weight_array) | |
weight_index = 0 | |
answer_ret = 0 | |
for weight in weight_array: | |
current_value = weight * np.mean(yearly_corps[weight_index][1:]) | |
answer_ret = answer_ret + current_value | |
weight_index += 1 | |
#print 'rtrn = %s'%answer_ret | |
#print 'volt = %s'%answer_vol | |
return answer_vol, answer_ret | |
def random_sampling(sampling_number): | |
#for companies in | |
for i in range(sampling_number): | |
random_array = np.random.random(len(stock_list.keys()))/sum(np.random.random(len(stock_list.keys()))) | |
x, y = get_answer(random_array); | |
#x_list.append(x); y_list.append(y); | |
""" Unit Option """ | |
x, y = (x), 100*(y) | |
#x, y = np.log(x), np.log(y) | |
current_color = [float(i)/float(sampling_number), 0, | |
1-(float(i)/float(sampling_number))] | |
sub3.scatter(x, y,c=current_color,marker='x',linewidths=3); | |
#sub3.scatter(x_list, y_list,c='r',marker='s',linewidths=1); | |
plt.pause(0.000001) | |
#plt.axis([-.005, .02, -.33, 2]) | |
plt.show() | |
plt.xlabel('Risk'); | |
plt.ylabel('Return ( Unit : % )') | |
plt.title('Simulation with randomly generated weights'+'\n\n'+ | |
'nSample = %s / %s'%(i,sampling_number)) | |
output_file_5.write('%.6f,%.6f'%(x,y)) | |
for parameters in random_array: | |
output_file_5.write(',%s'%parameters) | |
output_file_5.write('\n') | |
plt.grid(color='k', linestyle='-', linewidth=.2) | |
#pic_file_name_2 = '%s_%s.png'%('pic_results_',i) | |
#if np.random.random() > 0: # for backup | |
# pass#plt.savefig(pic_file_name_2) | |
fig3 = plt.figure() | |
sub3 = fig3.add_subplot(111); | |
random_sampling(500) | |
plt.savefig(pic_file_name) | |
output_file_5.close() | |
plt.savefig(pic_file_name) | |
os.system('%s'%pic_file_name) | |
print 'Analysis done' | |
os.system('open %s'%pic_file_name) | |
sys.exit() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment