Skip to content

Instantly share code, notes, and snippets.

  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save velotiotech/0439abc78d01883280d7c5f4a5033553 to your computer and use it in GitHub Desktop.
Recommendation System application with medical drug dataset picked from This dataset was used for the Winter 2018 Kaggle University Club Hackathon and is now publicly available.
#!/usr/bin/env python
# coding: utf-8
# Step 1
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
# Step 2
df = pd.read_csv('drugsComTest_raw.csv').fillna('NA')
df['condition_id'] = pd.Series(encoder.fit_transform(df['condition'].values), index=df.index)
df_medical = df.filter(['drugName', 'condition', 'rating', 'condition_id'], axis=1)
df_medical_ratings_pivot_matrix = csr_matrix(df_medical_ratings_pivot.values)
# Step 3
# distance = [‘cityblock’, ‘cosine’, ‘euclidean’, ‘l1’, ‘l2’, ‘manhattan’]
# algorithm = ['auto', 'ball_tree', 'kd_tree', 'brute', 'cuml']
model_knn = NearestNeighbors(metric = 'cosine', algorithm = 'brute')
# Step 4
sample_index = np.random.choice(df_medical_ratings_pivot.shape[0])
sample_condition = df_medical_ratings_pivot.iloc[sample_index,:].values.reshape(1, -1)
# Step 5
distances, indices = model_knn.kneighbors(sample_condition, n_neighbors = 6)
for i in range(0, len(distances.flatten())):
if i == 0:
print('Recommendations for {0}:\n'.format(df_medical_ratings_pivot.index[sample_index]))
recommendation = df_medical_ratings_pivot.index[indices.flatten()[i]]
distanceFromSample = distances.flatten()[i]
print('{0}: {1}, with distance of {2}:'.format(i, recommendation, distanceFromSample))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment