🗄️ manage_by_size.py 대형주 100개, 소형주 100개
🗄️ manage_by_year.py 2018, 2019, 2020 연도 별로 정리
import os | |
import numpy as np | |
import pandas as pd | |
from pandas import Series,DataFrame | |
# DataFrame | |
raw_stock = pd.read_csv("./data/stock.adj_close.csv") | |
# Missing value | |
# raw_stock.fillna(0,inplace=True) | |
''' Transpose ''' | |
trans_stock = raw_stock.transpose() | |
# columns = ticker code | |
trans_data = trans_stock.rename(columns=trans_stock.iloc[0]) | |
trans_data = trans_data.drop(trans_data.index[0]) | |
trans_date = trans_data.index[:] | |
''' divide the standards ''' | |
trans_mean = trans_data.mean() | |
# Large cap 100 | |
sort_large = trans_mean.sort_values(ascending=False) | |
large_mean = sort_large[:100] | |
# Large cap 100 data | |
large_cap = pd.DataFrame() | |
for large_ticker in large_mean.index: | |
large_cap[large_ticker]=trans_data[large_ticker] | |
large_cap = large_cap.set_index(trans_date) | |
# Small cap 100 | |
sort_small = trans_mean.sort_values() | |
small_mean = sort_small[:100] | |
# Small cap 100 data | |
small_cap = pd.DataFrame() | |
for small_ticker in small_mean.index: | |
small_cap[small_ticker]=trans_data[small_ticker] | |
small_cap = small_cap.set_index(trans_date) | |
# Export Large/Small cap data to csv | |
if not os.path.exists('./src/'): | |
os.makedirs('./src/') | |
large_cap.to_csv("./src/large_cap100.csv") | |
small_cap.to_csv("./src/small_cap100.csv") | |
# Missing value check | |
# print(stock.isnull().sum()) |
import numpy as np | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
from pandas import DataFrame | |
# DataFrame | |
raw_stock = pd.read_csv("./data/stock.adj_close.csv") | |
''' divide the year ''' | |
price = raw_stock.loc[:,'2018-06-01':] | |
# Extracted Symbol | |
ticker_code = pd.DataFrame(raw_stock.loc[:,'Symbol']) | |
# Extracted by year | |
price_2018 = price.loc[:,'2018-06-01':'2018-12-28'] | |
price_2019 = price.loc[:,'2019-01-02':'2019-12-30'] | |
price_2020 = price.loc[:,'2020-01-02':'2020-09-18'] | |
# Date extraction by year | |
date_2018 = pd.DataFrame(columns=price_2018.columns) | |
date_2019 = pd.DataFrame(columns=price_2019.columns) | |
date_2020 = pd.DataFrame(columns=price_2020.columns) | |
# Combine Symbol and year | |
data_2018 = pd.concat([ticker_code,price_2018],axis=1) | |
data_2019 = pd.concat([ticker_code,price_2019],axis=1) | |
data_2020 = pd.concat([ticker_code,price_2020],axis=1) | |
# Export combine data to csv | |
data_2018.to_csv("./src/2018/data_2018.csv",index=False) | |
data_2019.to_csv("./src/2019/data_2019.csv",index=False) | |
data_2020.to_csv("./src/2020/data_2020.csv",index=False) |