Skip to content

Instantly share code, notes, and snippets.

@fhsi1
Last active April 26, 2021 08:23
Show Gist options
  • Save fhsi1/ed9010c6065c5ff456af0da5dc1343e9 to your computer and use it in GitHub Desktop.
Save fhsi1/ed9010c6065c5ff456af0da5dc1343e9 to your computer and use it in GitHub Desktop.
Free/Pre Onboarding Course

✔️ 일자별 종목별 주가를 나타내는 csv 파일 가공하기

🗄️ manage_by_size.py 대형주 100개, 소형주 100개

🗄️ manage_by_year.py 2018, 2019, 2020 연도 별로 정리

import os
import numpy as np
import pandas as pd
from pandas import Series,DataFrame
# DataFrame
raw_stock = pd.read_csv("./data/stock.adj_close.csv")
# Missing value
# raw_stock.fillna(0,inplace=True)
''' Transpose '''
trans_stock = raw_stock.transpose()
# columns = ticker code
trans_data = trans_stock.rename(columns=trans_stock.iloc[0])
trans_data = trans_data.drop(trans_data.index[0])
trans_date = trans_data.index[:]
''' divide the standards '''
trans_mean = trans_data.mean()
# Large cap 100
sort_large = trans_mean.sort_values(ascending=False)
large_mean = sort_large[:100]
# Large cap 100 data
large_cap = pd.DataFrame()
for large_ticker in large_mean.index:
large_cap[large_ticker]=trans_data[large_ticker]
large_cap = large_cap.set_index(trans_date)
# Small cap 100
sort_small = trans_mean.sort_values()
small_mean = sort_small[:100]
# Small cap 100 data
small_cap = pd.DataFrame()
for small_ticker in small_mean.index:
small_cap[small_ticker]=trans_data[small_ticker]
small_cap = small_cap.set_index(trans_date)
# Export Large/Small cap data to csv
if not os.path.exists('./src/'):
os.makedirs('./src/')
large_cap.to_csv("./src/large_cap100.csv")
small_cap.to_csv("./src/small_cap100.csv")
# Missing value check
# print(stock.isnull().sum())
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pandas import DataFrame
# DataFrame
raw_stock = pd.read_csv("./data/stock.adj_close.csv")
''' divide the year '''
price = raw_stock.loc[:,'2018-06-01':]
# Extracted Symbol
ticker_code = pd.DataFrame(raw_stock.loc[:,'Symbol'])
# Extracted by year
price_2018 = price.loc[:,'2018-06-01':'2018-12-28']
price_2019 = price.loc[:,'2019-01-02':'2019-12-30']
price_2020 = price.loc[:,'2020-01-02':'2020-09-18']
# Date extraction by year
date_2018 = pd.DataFrame(columns=price_2018.columns)
date_2019 = pd.DataFrame(columns=price_2019.columns)
date_2020 = pd.DataFrame(columns=price_2020.columns)
# Combine Symbol and year
data_2018 = pd.concat([ticker_code,price_2018],axis=1)
data_2019 = pd.concat([ticker_code,price_2019],axis=1)
data_2020 = pd.concat([ticker_code,price_2020],axis=1)
# Export combine data to csv
data_2018.to_csv("./src/2018/data_2018.csv",index=False)
data_2019.to_csv("./src/2019/data_2019.csv",index=False)
data_2020.to_csv("./src/2020/data_2020.csv",index=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment