Skip to content

Instantly share code, notes, and snippets.

View intandeay's full-sized avatar

Intan Dea Yutami intandeay

View GitHub Profile
from sklearn.metrics import pairwise
# Define what features to use
# Select Columns
cols_avgRating = [g + '_avgRating' for g in genres]
cols_posWt = ['leadWt', 'supWt']
cols_all = cols_avgRating + ['recency_avg'] + cols_posWt
# Calculate Cosine Similarity among actors
sim = pairwise.cosine_similarity(actorFeatures[cols_all])
selectActor = "Song Hye-Kyo"
K = 5 #define how many closest actors to
cols_all = cols_posWt + cols_avgRating + ['recency_avg'] #define features to use
id_ = actorFeatures.loc[actorFeatures['actor_name'].str.contains(selectActor)].index[0] #find ID of the actor
print("Actor Name:", selectActor)
print("With K-Nearest Neighbors")
actor_vec = actorFeatures.loc[actorFeatures.index == id_, cols_all].values.reshape(1, -1) #Get the actor vector
recs = actorFeatures.drop(actorFeatures[actorFeatures.index == id_].index, axis = 0)[cols_all] #Get the rest vectors
## Assigning new Features per Genres
## This to accumulate number of drama in a certain genre
## And accumulative rating, per actor
cr_year = 2021
for g in genres:
actorFeatures[g + "_count"] = 0
actorFeatures[g + "_totalRating"] = 0
@intandeay
intandeay / KD - Collect Information.py
Created November 15, 2021 04:54
KD - Collect Information.py
df = pd.read_csv("../input/getting-list-of-k-dramas/kdrama.csv")
# Cancelling non-related year
df = df[~df['year'].isin(['cancelled', '2022', '2021–22'])]
# Filter null-id dramas
df_notnull = df[df['movie_id'].notnull()]
# Checking & Gathering Information
for idx, dr in df_n.iterrows():
@intandeay
intandeay / KD - Collect Movie ID.py
Created November 15, 2021 04:42
KD - Collect Movie ID.py
ia = IMDb()
# Iterate for every drama name
for idx, d in drama_list.iterrows():
title = d['title']
found = []
try:
# Get Year
year = int(d['year'][:4])
@intandeay
intandeay / KD - Get Drama List.py
Last active November 15, 2021 04:41
KD - Get Drama List.py
# URL to retrieve drama list
url = "https://en.wikipedia.org/wiki/List_of_South_Korean_dramas"
# Retrieve drama names
r = requests.get(url)
html_text = r.text
soup = BeautifulSoup(html_text, 'html.parser')
divs = soup.find_all(class_="div-col")
drama_list = []
@intandeay
intandeay / SA_histData.py
Created October 11, 2020 22:37
Stock Analysis - Historical Data
tickerDf1 = tickerData.history(period='1d', start='2010-1-1', end='2020-10-1')
@intandeay
intandeay / SA_snp.py
Created October 11, 2020 22:36
Stock Analysis - S&P
# ^GSPC is the symbol of S&P 500 in Yahoo! Finance
tickerData = yf.Ticker('^GSPC')
@intandeay
intandeay / SA_plotReturns.py
Created October 11, 2020 22:35
Stock Analysis - Plot Returns
fig, axes = plt.subplots(1,5, figsize=(20, 10),sharey=True)
width = 0.75
cols = ['6MR','1YR','3YR', '5YR', '10YR']
for i, j in enumerate(cols):
ax = axes[i]
tick = lastDate.ticker.apply(lambda t : ticker[t])
ax.barh(tick,lastDate[j], width, color = pagoda[i])
ax.set_title(j, fontweight ="bold")
ax.invert_yaxis()
fig.text(0.5,0, "Return (%)", ha="center", va="center", fontweight ="bold")
@intandeay
intandeay / SA_updateReturn.py
Created October 11, 2020 22:34
Stock Analysis - Update Return
lastDate['6MR'] = lastDate.apply(lambda r: getReturn('M', 6, r.ticker, r.Date, r.Close), axis =1)
lastDate['1YR'] = lastDate.apply(lambda r: getReturn('Y', 1, r.ticker, r.Date, r.Close), axis =1)
lastDate['3YR'] = lastDate.apply(lambda r: getReturn('Y', 3, r.ticker, r.Date, r.Close), axis =1)
lastDate['5YR'] = lastDate.apply(lambda r: getReturn('Y', 5, r.ticker, r.Date, r.Close), axis =1)
lastDate['10YR'] = lastDate.apply(lambda r: getReturn('Y', 10, r.ticker, r.Date, r.Close), axis =1)