Intan Dea Yutami intandeay

## KD - Cosine Sim.py
from sklearn.metrics import pairwise

# Define what features to use
# Select Columns
cols_avgRating = [g + '_avgRating' for g in genres]
cols_posWt = ['leadWt', 'supWt']
cols_all =  cols_avgRating + ['recency_avg'] + cols_posWt

# Calculate Cosine Similarity among actors
sim = pairwise.cosine_similarity(actorFeatures[cols_all])

## KD - Nearest Nbors.py
selectActor = "Song Hye-Kyo"
K = 5 #define how many closest actors to
cols_all = cols_posWt + cols_avgRating + ['recency_avg'] #define features to use
id_ = actorFeatures.loc[actorFeatures['actor_name'].str.contains(selectActor)].index[0] #find ID of the actor

print("Actor Name:", selectActor)
print("With K-Nearest Neighbors")

actor_vec = actorFeatures.loc[actorFeatures.index == id_, cols_all].values.reshape(1, -1) #Get the actor vector
recs = actorFeatures.drop(actorFeatures[actorFeatures.index == id_].index, axis = 0)[cols_all] #Get the rest vectors

## KD - Feature.py
## Assigning new Features per Genres
## This to accumulate number of drama in a certain genre
## And accumulative rating, per actor

cr_year = 2021

for g in genres:
    actorFeatures[g + "_count"] = 0
    actorFeatures[g + "_totalRating"] = 0


## KD - Collect Information.py
df = pd.read_csv("../input/getting-list-of-k-dramas/kdrama.csv")

# Cancelling non-related year
df = df[~df['year'].isin(['cancelled', '2022', '2021–22'])]

# Filter null-id dramas
df_notnull = df[df['movie_id'].notnull()]

# Checking & Gathering Information
for idx, dr in df_n.iterrows():

## KD - Collect Movie ID.py
ia = IMDb()

# Iterate for every drama name
for idx, d in drama_list.iterrows():
    title = d['title']
    found = []

    try:
        # Get Year
        year = int(d['year'][:4])

## KD - Get Drama List.py
# URL to retrieve drama list
url = "https://en.wikipedia.org/wiki/List_of_South_Korean_dramas"

# Retrieve drama names
r = requests.get(url)
html_text = r.text
soup = BeautifulSoup(html_text, 'html.parser')
divs = soup.find_all(class_="div-col")

drama_list = []

## SA_histData.py
tickerDf1 = tickerData.history(period='1d', start='2010-1-1', end='2020-10-1')

## SA_snp.py
# ^GSPC is the symbol of S&P 500 in Yahoo! Finance
tickerData = yf.Ticker('^GSPC')

## SA_plotReturns.py
fig, axes = plt.subplots(1,5, figsize=(20, 10),sharey=True)
width = 0.75
cols = ['6MR','1YR','3YR', '5YR', '10YR']
for i, j in enumerate(cols):
    ax = axes[i]
    tick = lastDate.ticker.apply(lambda t : ticker[t])
    ax.barh(tick,lastDate[j], width, color = pagoda[i])
    ax.set_title(j, fontweight ="bold")
    ax.invert_yaxis()
fig.text(0.5,0, "Return (%)", ha="center", va="center", fontweight ="bold")

## SA_updateReturn.py
lastDate['6MR'] = lastDate.apply(lambda r: getReturn('M', 6, r.ticker, r.Date, r.Close), axis =1)
lastDate['1YR'] = lastDate.apply(lambda r: getReturn('Y', 1, r.ticker, r.Date, r.Close), axis =1)
lastDate['3YR'] = lastDate.apply(lambda r: getReturn('Y', 3, r.ticker, r.Date, r.Close), axis =1)
lastDate['5YR'] = lastDate.apply(lambda r: getReturn('Y', 5, r.ticker, r.Date, r.Close), axis =1)
lastDate['10YR'] = lastDate.apply(lambda r: getReturn('Y', 10, r.ticker, r.Date, r.Close), axis =1)
	from sklearn.metrics import pairwise

	# Define what features to use
	# Select Columns
	cols_avgRating = [g + '_avgRating' for g in genres]
	cols_posWt = ['leadWt', 'supWt']
	cols_all = cols_avgRating + ['recency_avg'] + cols_posWt

	# Calculate Cosine Similarity among actors
	sim = pairwise.cosine_similarity(actorFeatures[cols_all])
	selectActor = "Song Hye-Kyo"
	K = 5 #define how many closest actors to
	cols_all = cols_posWt + cols_avgRating + ['recency_avg'] #define features to use
	id_ = actorFeatures.loc[actorFeatures['actor_name'].str.contains(selectActor)].index[0] #find ID of the actor

	print("Actor Name:", selectActor)
	print("With K-Nearest Neighbors")

	actor_vec = actorFeatures.loc[actorFeatures.index == id_, cols_all].values.reshape(1, -1) #Get the actor vector
	recs = actorFeatures.drop(actorFeatures[actorFeatures.index == id_].index, axis = 0)[cols_all] #Get the rest vectors
	## Assigning new Features per Genres
	## This to accumulate number of drama in a certain genre
	## And accumulative rating, per actor

	cr_year = 2021

	for g in genres:
	actorFeatures[g + "_count"] = 0
	actorFeatures[g + "_totalRating"] = 0
	df = pd.read_csv("../input/getting-list-of-k-dramas/kdrama.csv")

	# Cancelling non-related year
	df = df[~df['year'].isin(['cancelled', '2022', '2021–22'])]

	# Filter null-id dramas
	df_notnull = df[df['movie_id'].notnull()]

	# Checking & Gathering Information
	for idx, dr in df_n.iterrows():
	ia = IMDb()

	# Iterate for every drama name
	for idx, d in drama_list.iterrows():
	title = d['title']
	found = []

	try:
	# Get Year
	year = int(d['year'][:4])
	# URL to retrieve drama list
	url = "https://en.wikipedia.org/wiki/List_of_South_Korean_dramas"

	# Retrieve drama names
	r = requests.get(url)
	html_text = r.text
	soup = BeautifulSoup(html_text, 'html.parser')
	divs = soup.find_all(class_="div-col")

	drama_list = []
	# ^GSPC is the symbol of S&P 500 in Yahoo! Finance
	tickerData = yf.Ticker('^GSPC')
	fig, axes = plt.subplots(1,5, figsize=(20, 10),sharey=True)
	width = 0.75
	cols = ['6MR','1YR','3YR', '5YR', '10YR']
	for i, j in enumerate(cols):
	ax = axes[i]
	tick = lastDate.ticker.apply(lambda t : ticker[t])
	ax.barh(tick,lastDate[j], width, color = pagoda[i])
	ax.set_title(j, fontweight ="bold")
	ax.invert_yaxis()
	fig.text(0.5,0, "Return (%)", ha="center", va="center", fontweight ="bold")
	lastDate['6MR'] = lastDate.apply(lambda r: getReturn('M', 6, r.ticker, r.Date, r.Close), axis =1)
	lastDate['1YR'] = lastDate.apply(lambda r: getReturn('Y', 1, r.ticker, r.Date, r.Close), axis =1)
	lastDate['3YR'] = lastDate.apply(lambda r: getReturn('Y', 3, r.ticker, r.Date, r.Close), axis =1)
	lastDate['5YR'] = lastDate.apply(lambda r: getReturn('Y', 5, r.ticker, r.Date, r.Close), axis =1)
	lastDate['10YR'] = lastDate.apply(lambda r: getReturn('Y', 10, r.ticker, r.Date, r.Close), axis =1)