-
-
Save jmayse/7c76a2d838ac164b923a47b29527f2ed to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Python 3.7.3 (default, Mar 27 2019, 09:23:15) | |
Type 'copyright', 'credits' or 'license' for more information | |
IPython 7.3.0 -- An enhanced Interactive Python. Type '?' for help. | |
PyDev console: using IPython 7.3.0 | |
Python 3.7.3 (default, Mar 27 2019, 09:23:15) | |
[Clang 10.0.1 (clang-1001.0.46.3)] on darwin | |
import pandas as pd | |
import numpy as np | |
df = pd.read_csv('/Projects/drink_recommendation_engine/sample_2.csv', sep='\t') | |
from mlxtend.preprocessing import TransactionEncoder | |
te = TransactionEncoder() | |
_items = te.fit_transform(df.groupby(['ticket_id'])['item_name'].apply(lambda x: tuple(x))) | |
items = pd.DataFrame(_items, columns=te.columns_) | |
from timeit import repeat | |
from mlxtend.frequent_patterns.apriori import apriori | |
from mlxtend.frequent_patterns.fpgrowth import fpgrowth | |
print("fpgrowth: {} transactions, {} unique items".format(items.shape[0], items.shape[1])) | |
runs = repeat("fpgrowth(items, min_support=0.01)", globals=globals(), number=1, repeat=5) | |
print("fastest run: {}, slowest run: {}, mean: {}".format(min(runs), max(runs), np.mean(runs))) | |
print("apriori: {} transactions, {} unique items".format(items.shape[0], items.shape[1])) | |
runs = repeat("apriori(items, min_support=0.01)", globals=globals(), number=1, repeat=5) | |
print("fastest run: {}, slowest run: {}, mean: {}".format(min(runs), max(runs), np.mean(runs))) | |
df1 = fpgrowth(items, min_support=0.01) | |
df2 = apriori(items, min_support=0.01) | |
df3 = pd.merge(df1, df2, left_on=['itemsets'], right_on=['itemsets']) | |
s1 = df3['support_x'] | |
s2 = df3['support_y'] | |
s1.name = 'support' | |
s2.name = 'support' | |
if not pd.testing.assert_series_equal(s1, s2): print("Assertion passed") | |
fpgrowth: 8676 transactions, 966 unique items | |
fastest run: 0.24607628100000056, slowest run: 0.2710467320000012, mean: 0.25534826600000055 | |
apriori: 8676 transactions, 966 unique items | |
fastest run: 0.8877421420000005, slowest run: 0.9147161260000001, mean: 0.8998771082000001 | |
Assertion passed |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Python 3.7.3 (default, Mar 27 2019, 09:23:15) | |
Type 'copyright', 'credits' or 'license' for more information | |
IPython 7.3.0 -- An enhanced Interactive Python. Type '?' for help. | |
PyDev console: using IPython 7.3.0 | |
Python 3.7.3 (default, Mar 27 2019, 09:23:15) | |
[Clang 10.0.1 (clang-1001.0.46.3)] on darwin | |
import pandas as pd | |
import numpy as np | |
df = pd.read_csv('/Projects/drink_recommendation_engine/sample.csv', sep='\t') | |
from mlxtend.preprocessing import TransactionEncoder | |
te = TransactionEncoder() | |
_items = te.fit_transform(df.groupby(['ticket_id'])['item_name'].apply(lambda x: tuple(x))) | |
items = pd.DataFrame(_items, columns=te.columns_) | |
from timeit import repeat | |
from mlxtend.frequent_patterns.apriori import apriori | |
from mlxtend.frequent_patterns.fpgrowth import fpgrowth | |
print("fpgrowth: {} transactions, {} unique items".format(items.shape[0], items.shape[1])) | |
runs = repeat("fpgrowth(items, min_support=0.05)", globals=globals(), number=10, repeat=10) | |
print("fastest run: {}, slowest run: {}, mean: {}".format(min(runs), max(runs), np.mean(runs))) | |
print("apriori: {} transactions, {} unique items".format(items.shape[0], items.shape[1])) | |
runs = repeat("apriori(items, min_support=0.05)", globals=globals(), number=10, repeat=10) | |
print("fastest run: {}, slowest run: {}, mean: {}".format(min(runs), max(runs), np.mean(runs))) | |
df1 = fpgrowth(items, min_support=0.01) | |
df2 = apriori(items, min_support=0.01) | |
df3 = pd.merge(df1, df2, left_on=['itemsets'], right_on=['itemsets']) | |
s1 = df3['support_x'] | |
s2 = df3['support_y'] | |
s1.name = 'support' | |
s2.name = 'support' | |
if not pd.testing.assert_series_equal(s1, s2): print("Assertion passed") | |
fpgrowth: 1069 transactions, 244 unique items | |
fastest run: 0.2882803219999994, slowest run: 0.31644036000000053, mean: 0.30230184899999984 | |
apriori: 1069 transactions, 244 unique items | |
fastest run: 0.33422621099999983, slowest run: 0.35184302599999917, mean: 0.34135044549999993 | |
Assertion passed |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Python 3.7.3 (default, Mar 27 2019, 09:23:15) | |
Type 'copyright', 'credits' or 'license' for more information | |
IPython 7.3.0 -- An enhanced Interactive Python. Type '?' for help. | |
PyDev console: using IPython 7.3.0 | |
Python 3.7.3 (default, Mar 27 2019, 09:23:15) | |
[Clang 10.0.1 (clang-1001.0.46.3)] on darwin | |
import pandas as pd | |
import numpy as np | |
df = pd.read_csv('/Projects/drink_recommendation_engine/sample_2.csv', sep='\t') | |
from mlxtend.preprocessing import TransactionEncoder | |
te = TransactionEncoder() | |
_items = te.fit_transform(df.groupby(['ticket_id'])['item_name'].apply(lambda x: tuple(x))) | |
items = pd.DataFrame(_items, columns=te.columns_) | |
from timeit import repeat | |
from mlxtend.frequent_patterns.apriori import apriori | |
from mlxtend.frequent_patterns.fpgrowth import fpgrowth | |
print("fpgrowth: {} transactions, {} unique items".format(items.shape[0], items.shape[1])) | |
runs = repeat("fpgrowth(items, min_support=0.01)", globals=globals(), number=1, repeat=5) | |
print("fastest run: {}, slowest run: {}, mean: {}".format(min(runs), max(runs), np.mean(runs))) | |
print("apriori: {} transactions, {} unique items".format(items.shape[0], items.shape[1])) | |
runs = repeat("apriori(items, min_support=0.01)", globals=globals(), number=1, repeat=5) | |
print("fastest run: {}, slowest run: {}, mean: {}".format(min(runs), max(runs), np.mean(runs))) | |
df1 = fpgrowth(items, min_support=0.01) | |
df2 = apriori(items, min_support=0.01) | |
df3 = pd.merge(df1, df2, left_on=['itemsets'], right_on=['itemsets']) | |
s1 = df3['support_x'] | |
s2 = df3['support_y'] | |
s1.name = 'support' | |
s2.name = 'support' | |
if not pd.testing.assert_series_equal(s1, s2): print("Assertion passed") | |
fpgrowth: 8676 transactions, 966 unique items | |
fastest run: 0.2531228709999995, slowest run: 0.2903997089999999, mean: 0.2693362885999999 | |
apriori: 8676 transactions, 966 unique items | |
fastest run: 0.8125187799999996, slowest run: 0.9776996000000002, mean: 0.8516161937999998 | |
Assertion passed |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Python 3.7.3 (default, Mar 27 2019, 09:23:15) | |
Type 'copyright', 'credits' or 'license' for more information | |
IPython 7.3.0 -- An enhanced Interactive Python. Type '?' for help. | |
PyDev console: using IPython 7.3.0 | |
Python 3.7.3 (default, Mar 27 2019, 09:23:15) | |
[Clang 10.0.1 (clang-1001.0.46.3)] on darwin | |
import pandas as pd | |
import numpy as np | |
df = pd.read_csv('/Projects/drink_recommendation_engine/sample.csv', sep='\t') | |
from mlxtend.preprocessing import TransactionEncoder | |
te = TransactionEncoder() | |
_items = te.fit_transform(df.groupby(['ticket_id'])['item_name'].apply(lambda x: tuple(x))) | |
items = pd.DataFrame(_items, columns=te.columns_) | |
from timeit import repeat | |
from mlxtend.frequent_patterns.apriori import apriori | |
from mlxtend.frequent_patterns.fpgrowth import fpgrowth | |
print("fpgrowth: {} transactions, {} unique items".format(items.shape[0], items.shape[1])) | |
runs = repeat("fpgrowth(items, min_support=0.01)", globals=globals(), number=1, repeat=5) | |
print("fastest run: {}, slowest run: {}, mean: {}".format(min(runs), max(runs), np.mean(runs))) | |
print("apriori: {} transactions, {} unique items".format(items.shape[0], items.shape[1])) | |
runs = repeat("apriori(items, min_support=0.01)", globals=globals(), number=1, repeat=5) | |
print("fastest run: {}, slowest run: {}, mean: {}".format(min(runs), max(runs), np.mean(runs))) | |
df1 = fpgrowth(items, min_support=0.01) | |
df2 = apriori(items, min_support=0.01) | |
df3 = pd.merge(df1, df2, left_on=['itemsets'], right_on=['itemsets']) | |
s1 = df3['support_x'] | |
s2 = df3['support_y'] | |
s1.name = 'support' | |
s2.name = 'support' | |
if not pd.testing.assert_series_equal(s1, s2): print("Assertion passed") | |
fpgrowth: 1069 transactions, 244 unique items | |
fastest run: 0.11113154099999978, slowest run: 0.11296855999999966, mean: 0.11202025359999972 | |
apriori: 1069 transactions, 244 unique items | |
fastest run: 0.32792389399999955, slowest run: 0.39672938999999996, mean: 0.3459437119999995 | |
Assertion passed |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# With 0.05 min_support, new apriori is faster | |
import pandas as pd | |
import numpy as np | |
df = pd.read_csv('/Projects/drink_recommendation_engine/sample.csv', sep='\t') | |
from mlxtend.preprocessing import TransactionEncoder | |
te = TransactionEncoder() | |
_items = te.fit_transform(df.groupby(['ticket_id'])['item_name'].apply(lambda x: tuple(x))) | |
items = pd.DataFrame(_items, columns=te.columns_) | |
from timeit import repeat | |
from mlxtend.frequent_patterns.apriori import apriori | |
from mlxtend.frequent_patterns.fpgrowth import fpgrowth | |
print("fpgrowth: {} transactions, {} unique items".format(items.shape[0], items.shape[1])) | |
runs = repeat("fpgrowth(items, min_support=0.05)", globals=globals(), number=10, repeat=10) | |
print("fastest run: {}, slowest run: {}, mean: {}".format(min(runs), max(runs), np.mean(runs))) | |
print("apriori: {} transactions, {} unique items".format(items.shape[0], items.shape[1])) | |
runs = repeat("apriori(items, min_support=0.05)", globals=globals(), number=10, repeat=10) | |
print("fastest run: {}, slowest run: {}, mean: {}".format(min(runs), max(runs), np.mean(runs))) | |
df1 = fpgrowth(items, min_support=0.01) | |
df2 = apriori(items, min_support=0.01) | |
df3 = pd.merge(df1, df2, left_on=['itemsets'], right_on=['itemsets']) | |
s1 = df3['support_x'] | |
s2 = df3['support_y'] | |
s1.name = 'support' | |
s2.name = 'support' | |
if not pd.testing.assert_series_equal(s1, s2): print("Assertion passed") | |
fpgrowth: 1069 transactions, 244 unique items | |
fastest run: 0.2869345769999967, slowest run: 0.36350778599999956, mean: 0.3085288325999983 | |
apriori: 1069 transactions, 244 unique items | |
fastest run: 0.10744446700000765, slowest run: 0.11501072999999451, mean: 0.10995658089999694 | |
Assertion passed | |
# With 0.01 min_support, fpgrowth is much faster | |
import pandas as pd | |
import numpy as np | |
df = pd.read_csv('/Projects/drink_recommendation_engine/sample.csv', sep='\t') | |
from mlxtend.preprocessing import TransactionEncoder | |
te = TransactionEncoder() | |
_items = te.fit_transform(df.groupby(['ticket_id'])['item_name'].apply(lambda x: tuple(x))) | |
items = pd.DataFrame(_items, columns=te.columns_) | |
from timeit import repeat | |
from mlxtend.frequent_patterns.apriori import apriori | |
from mlxtend.frequent_patterns.fpgrowth import fpgrowth | |
print("fpgrowth: {} transactions, {} unique items".format(items.shape[0], items.shape[1])) | |
runs = repeat("fpgrowth(items, min_support=0.01)", globals=globals(), number=10, repeat=10) | |
print("fastest run: {}, slowest run: {}, mean: {}".format(min(runs), max(runs), np.mean(runs))) | |
print("apriori: {} transactions, {} unique items".format(items.shape[0], items.shape[1])) | |
runs = repeat("apriori(items, min_support=0.01)", globals=globals(), number=10, repeat=10) | |
print("fastest run: {}, slowest run: {}, mean: {}".format(min(runs), max(runs), np.mean(runs))) | |
df1 = fpgrowth(items, min_support=0.01) | |
df2 = apriori(items, min_support=0.01) | |
df3 = pd.merge(df1, df2, left_on=['itemsets'], right_on=['itemsets']) | |
s1 = df3['support_x'] | |
s2 = df3['support_y'] | |
s1.name = 'support' | |
s2.name = 'support' | |
if not pd.testing.assert_series_equal(s1, s2): print("Assertion passed") | |
fpgrowth: 1069 transactions, 244 unique items | |
fastest run: 1.1011493270000017, slowest run: 1.1626929589999975, mean: 1.1198546620000003 | |
apriori: 1069 transactions, 244 unique items | |
fastest run: 3.4599831980000033, slowest run: 3.8697552900000005, mean: 3.575044468499999 | |
Assertion passed |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment