Skip to content

Instantly share code, notes, and snippets.

@Lakshmi-1212
Lakshmi-1212 / LP_prob.py
Created February 6, 2021 13:07
Print the LP problem
print(prob)
@Lakshmi-1212
Lakshmi-1212 / RandomForestClassifier.py
Created February 21, 2021 11:22
Random Forest Classifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import r2_score
# Create a random forest classifier and fit the training data
rfmodel = RandomForestClassifier().fit(X_train, y_train)
# Predict the values on the test dataset using sklearn
y_pred_rf = rfmodel.predict(X_test)
r2_score(y_test, y_pred_rf)
@Lakshmi-1212
Lakshmi-1212 / LimeExplainer.py
Created February 21, 2021 11:31
Create the Lime Explainer
from lime.lime_tabular import LimeTabularExplainer
# Create the Lime Explainer for tabular data
explainer = LimeTabularExplainer(X_train.values,
mode='classification',feature_names=X_train.columns,class_names=model.classes_ )
# Explanation from LIME model for observation 1
observation = 1
# Set the predict function for the model
predict_fn = lambda x: rfmodel.predict_proba(x).astype(float)
# Explain the prediction
lime_explanation = rfexplainer.explain_instance(X_test.values[observation], rfpredict_fn, num_features=10)
# Show the explanation in the notebook
lime_explanation.show_in_notebook(show_table=True)
@Lakshmi-1212
Lakshmi-1212 / batsman.py
Created May 30, 2021 15:27
Batsman Statistics
# Group details on batsmen
batgroup = balldf.groupby(['batsman'])
# Create a batting dataframe with a summary statistics for each batsman
batdf = pd.DataFrame(batgroup['ball'].count()).rename(columns={'ball':'balls_faced'})
batdf['innings'] = batgroup['id'].nunique()
batdf['runs'] = batgroup['batsman_runs'].sum()
batdf['4s'] = balldf[balldf['batsman_runs'] == 4].groupby('batsman')['batsman_runs'].count()
batdf['4s'].fillna(0,inplace=True)
batdf['6s'] = balldf[balldf['batsman_runs'] == 6].groupby('batsman')['batsman_runs'].count()
@Lakshmi-1212
Lakshmi-1212 / bowlers.py
Created May 30, 2021 15:28
Bowling Statistics
# Group details on bowler
bowlgroup = balldf.groupby(['bowler'])
# Create a bowling dataframe (bowldf) with a summary statistics for each batsman
bowldf = pd.DataFrame(bowlgroup['ball'].count()).rename(columns={'ball':'balls_bowled'})
# Get no. of wickets taken by each bowler
bwl_wkts = balldf[balldf['dismissal_kind'].isin(['caught','bowled', 'lbw','stumped', 'caught and bowled', 'hit wicket'])]
bowldf['wickets'] = bwl_wkts.groupby(['bowler'])['ball'].count()
bowldf['wickets'].fillna(0,inplace=True)
@Lakshmi-1212
Lakshmi-1212 / all_players.py
Last active June 6, 2021 14:01
All players list
# Create a dataframe with all players list
all_players_dict = {}
out_temp = balldf.apply(lambda x: update_player_list(x),axis=1)
all_df = pd.DataFrame({'Players':list(all_players_dict.keys())})
all_df['matches'] = all_df['Players'].apply(lambda x: len(all_players_dict[x]))
all_df=all_df.set_index('Players')
@Lakshmi-1212
Lakshmi-1212 / merge_data.py
Last active June 5, 2021 18:46
Merge data and add general statistics
# Combine the batting and bowling dataframes to create a merged players dataframe
players = pd.merge(all_df,batdf, left_index=True, right_index=True,how='outer')
players = pd.merge(players,bowldf, left_index=True, right_index=True,how='outer')
# Number of man of the matches from matches file
players = pd.merge(players,matches['player_of_match'].value_counts(), left_index=True, right_index=True,how='left')
players['player_of_match'] = players[['player_of_match']].fillna(0)
@Lakshmi-1212
Lakshmi-1212 / kmeans.py
Created June 1, 2021 05:32
Apply the k-means algorithm
from sklearn.cluster import KMeans
# Create the KMeans object for 5 clusters and 50 iterations
kmeans = KMeans(n_clusters=5, max_iter=50)
# Fit the k-means algorithm on the given scaled players data
kmeans.fit(players_scaled)
# Assign the label to the output column in the original players
players[cluster_kmeans] = kmeans.labels_
@Lakshmi-1212
Lakshmi-1212 / kmeans_clustering.py
Last active June 2, 2021 04:52
K-Means Clustering
from sklearn.cluster import KMeans
# Create the KMeans object for 5 clusters and 50 iterations
kmeans = KMeans(n_clusters=4, max_iter=50)
# Fit the k-means algorithm on the given scaled players data
kmeans.fit(players_scaled)
# Assign the label to the output column in the original players
players["cluster_kmeans"] = kmeans.labels_