Skip to content

Instantly share code, notes, and snippets.

View Abuton's full-sized avatar
💭
A better me everyday

Abubakar Alaro Abuton

💭
A better me everyday
  • Kitopi, AFarms
  • Ilorin, Kwara State
  • X @Abuton1
View GitHub Profile
@Abuton
Abuton / Assignment.ipynb
Created January 5, 2021 11:04
Final Assignment on Scalable ML using pyspark
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@Abuton
Abuton / convert_arr_series_2_dataframe.py
Created February 20, 2021 21:06
Convert an array series to pandas dataframe. From a single series (column) to multiple columns
import pandas as pd
percentiles = df['arrival_delay_deciles'].apply(pd.Series)
percentiles.rename(columns = lambda x : '{0}%'.format(x*10), inplace=True)
percentiles.head()
# function to calculate feature engineering by aggregating integer columns
def get_agg_by_cols(data, col, agg_list, agg_cols):
"""
data : dataframe to calculate FE on
col : list of columns to aggregate by (the object datatype)
agg_list : a list of statistical measure e.g mean, median
agg_cols : a list of numerical datatype columns
"""
for cols in col:
for i in agg_list:
from sklearn.model_selection import KFold, StratifiedKFold
from catboost import CatBoostClassifier
from sklearn.metrics import roc_auc_score
kfold, scores, y_pred_totcb = StratifiedKFold(n_splits=5, shuffle=True, random_state=221), list(), []
for train, test in kfold.split(X, y):
x_train, x_test = X.iloc[train], X.iloc[test]
y_train, y_test = y.iloc[train], y.iloc[test]
model = CatBoostClassifier(random_state=27, n_estimators=3000, cat_features = cat_columns,
max_depth=7, verbose=500, learning_rate=0.102, eval_metric='AUC')
@Abuton
Abuton / remove_system_generated_msgs.py
Last active December 28, 2021 20:01
Remove system generated messages like "added", "removed", "left", "joined using this", "message was deleted"
def remove_system_generated_msgs(uploaded_file: str) -> list:
"""Remove system generated messages like::
1. +234 was added
2. +234 left etc
uploaded_file: str: path of dataset
Returns:
--------
list
@Abuton
Abuton / remove_emoji.py
Last active September 25, 2021 09:53
remove emojis from chats i think idont need
#removing emojis
def remove_emoji(text: str) -> str:
"""
remove emojis from chats i think idont need
args:
----
text: a single list of message
Returns:
@Abuton
Abuton / process_text.py
Last active December 29, 2021 11:40
process the uploaded chat data by removing unwanted entries and returns a dataframe
import pandas as pd
def processChatMsgs(wh_chat: list) -> pd.DataFrame:
"""
process the uploaded chat data by removing unwanted
entries and returns a dataframe
args:
-----
wh_chat: whatsapp chat data
def get_total_msg(user:str, df:pd.DataFrame) -> str:
"""
Return the total msgs 'user' has sent
Args:
----
user: the user to extract his/her total count of messages
df: pd.DataFrame: dataframe that stores information about each user and message sent
Returns:
--------
from nltk.corpus import stopwords
import matplotlib.pyplot as plt
from wordcloud import WordCloud
def draw_wordcloud(msgs: list):
"""
Draw wordcloud for visualization of the most used words
during conversation
args:
-----
def pie_chart(user):
fig, ax = plt.subplots(figsize=(15, 8))
explodex = []
for i in np.arange(len(user)):
explodex.append(0.005)
ax = user.plot(kind='pie', colors=['red', 'green', 'cyan', 'lime', 'gold'],
fontsize=12, autopct='%1.1f%%', startangle=180,
pctdistance=0.85, explode=explodex)
inner_circle = plt.Circle((0,0), 0.50, fc='white')
fig = plt.gcf()