Skip to content

Instantly share code, notes, and snippets.

@tanpengshi
Created November 16, 2020 12:42
Show Gist options
  • Save tanpengshi/8ed3fb08d185abc590806c6789229529 to your computer and use it in GitHub Desktop.
Save tanpengshi/8ed3fb08d185abc590806c6789229529 to your computer and use it in GitHub Desktop.
product_cat = X_train[['product_id','price_category','category_code','brand']].drop_duplicates('product_id')
product_cat = product_cat.sort_values(by='product_id')
price_cat_matrix = np.reciprocal(euclidean_distances(np.array(product_cat['price_category']).reshape(-1,1))+1)
euclidean_matrix = pd.DataFrame(price_cat_matrix,columns=product_cat['product_id'],index=product_cat['product_id'])
tfidf_vectorizer = TfidfVectorizer()
doc_term = tfidf_vectorizer.fit_transform(list(product_cat['category_code']))
dt_matrix = pd.DataFrame(doc_term.toarray().round(3), index=[i for i in product_cat['product_id']], columns=tfidf_vectorizer.get_feature_names())
cos_similar_matrix = pd.DataFrame(cosine_similarity(dt_matrix.values),columns=product_cat['product_id'],index=product_cat['product_id'])
tfidf_vectorizer = TfidfVectorizer()
doc_term = tfidf_vectorizer.fit_transform(list(product_cat['brand']))
dt_matrix1 = pd.DataFrame(doc_term.toarray().round(3), index=[i for i in product_cat['product_id']], columns=tfidf_vectorizer.get_feature_names())
dt_matrix1 = dt_matrix1 + 0.01
cos_similar_matrix1 = pd.DataFrame(cosine_similarity(dt_matrix1.values),columns=product_cat['product_id'],index=product_cat['product_id'])
similarity_matrix = cos_similar_matrix.multiply(euclidean_matrix).multiply(cos_similar_matrix1)
content_matrix = X_train_matrix.dot(similar_matrix)
std = MinMaxScaler(feature_range=(0, 1))
std.fit(content_matrix.values)
content_matrix = std.transform(content_matrix.values)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment