Skip to content

Instantly share code, notes, and snippets.

@chaitanyarahalkar
Last active August 20, 2022 19:01
Show Gist options
  • Save chaitanyarahalkar/23f1567563b0177bd876f629291cfad9 to your computer and use it in GitHub Desktop.
Save chaitanyarahalkar/23f1567563b0177bd876f629291cfad9 to your computer and use it in GitHub Desktop.
Apriori Algorithm
import numpy as np
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
import os
import requests
if not os.path.exists("Online_Retail.xlsx"):
r = requests.get("http://archive.ics.uci.edu/ml/machine-learning-databases/00352/Online%20Retail.xlsx")
with open("Online_Retail.xlsx","wb") as f:
f.write(r.content)
# Loading the Data
data = pd.read_excel('Online_Retail.xlsx')
# Stripping extra spaces in the description
data['Description'] = data['Description'].str.strip()
# Dropping the rows without any invoice number
data.dropna(axis = 0, subset =['InvoiceNo'], inplace = True)
data['InvoiceNo'] = data['InvoiceNo'].astype('str')
# Dropping all transactions which were done on credit
data = data[~data['InvoiceNo'].str.contains('C')]
# Transactions done in France
basket_France = (data[data['Country'] =="France"]
.groupby(['InvoiceNo', 'Description'])['Quantity']
.sum().unstack().reset_index().fillna(0)
.set_index('InvoiceNo'))
# Transactions done in the United Kingdom
basket_UK = (data[data['Country'] =="United Kingdom"]
.groupby(['InvoiceNo', 'Description'])['Quantity']
.sum().unstack().reset_index().fillna(0)
.set_index('InvoiceNo'))
# Transactions done in Portugal
basket_Por = (data[data['Country'] =="Portugal"]
.groupby(['InvoiceNo', 'Description'])['Quantity']
.sum().unstack().reset_index().fillna(0)
.set_index('InvoiceNo'))
basket_Sweden = (data[data['Country'] =="Sweden"]
.groupby(['InvoiceNo', 'Description'])['Quantity']
.sum().unstack().reset_index().fillna(0)
.set_index('InvoiceNo'))
# Defining the hot encoding function to make the data suitable
# for the concerned libraries
def hot_encode(x):
if(x<= 0):
return 0
if(x>= 1):
return 1
# Encoding the datasets
basket_encoded = basket_France.applymap(hot_encode)
basket_France = basket_encoded
basket_encoded = basket_UK.applymap(hot_encode)
basket_UK = basket_encoded
basket_encoded = basket_Por.applymap(hot_encode)
basket_Por = basket_encoded
basket_encoded = basket_Sweden.applymap(hot_encode)
basket_Sweden = basket_encoded
print("For France -")
# Building the model
frq_items = apriori(basket_France, min_support = 0.05, use_colnames = True)
# Collecting the inferred rules in a dataframe
rules = association_rules(frq_items, metric ="lift", min_threshold = 1)
rules = rules.sort_values(['confidence', 'lift'], ascending =[False, False])
print(rules.head())
print("For United Kingdom")
frq_items = apriori(basket_UK, min_support = 0.01, use_colnames = True)
rules = association_rules(frq_items, metric ="lift", min_threshold = 1)
rules = rules.sort_values(['confidence', 'lift'], ascending =[False, False])
print(rules.head())
print("For Portugal")
frq_items = apriori(basket_Por, min_support = 0.05, use_colnames = True)
rules = association_rules(frq_items, metric ="lift", min_threshold = 1)
rules = rules.sort_values(['confidence', 'lift'], ascending =[False, False])
print(rules.head())
print("For Sweden")
frq_items = apriori(basket_Sweden, min_support = 0.05, use_colnames = True)
rules = association_rules(frq_items, metric ="lift", min_threshold = 1)
rules = rules.sort_values(['confidence', 'lift'], ascending =[False, False])
print(rules.head())
numpy
pandas
requests
mlxtend
@Nadeem-Ullah
Copy link

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment