Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
Recommendation System application with medical drug dataset picked from This dataset was used for the Winter 2018 Kaggle University Club Hackathon and is now publicly available.
#!/usr/bin/env python
# coding: utf-8
# Step 1
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
# Step 2
df = pd.read_csv('drugsComTest_raw.csv').fillna('NA')
df['condition_id'] = pd.Series(encoder.fit_transform(df['condition'].values), index=df.index)
df_medical = df.filter(['drugName', 'condition', 'rating', 'condition_id'], axis=1)
df_medical_ratings_pivot_matrix = csr_matrix(df_medical_ratings_pivot.values)
# Step 3
# distance = [‘cityblock’, ‘cosine’, ‘euclidean’, ‘l1’, ‘l2’, ‘manhattan’]
# algorithm = ['auto', 'ball_tree', 'kd_tree', 'brute', 'cuml']
model_knn = NearestNeighbors(metric = 'cosine', algorithm = 'brute')
# Step 4
sample_index = np.random.choice(df_medical_ratings_pivot.shape[0])
sample_condition = df_medical_ratings_pivot.iloc[sample_index,:].values.reshape(1, -1)
# Step 5
distances, indices = model_knn.kneighbors(sample_condition, n_neighbors = 6)
for i in range(0, len(distances.flatten())):
if i == 0:
print('Recommendations for {0}:\n'.format(df_medical_ratings_pivot.index[sample_index]))
recommendation = df_medical_ratings_pivot.index[indices.flatten()[i]]
distanceFromSample = distances.flatten()[i]
print('{0}: {1}, with distance of {2}:'.format(i, recommendation, distanceFromSample))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment