Created
December 4, 2017 19:59
-
-
Save duarteocarmo/c9cea611577ea2d33b5f2cb0614b70c6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from subprocess import run | |
import re | |
import os | |
import time | |
from sys import platform | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
from Project_Clean_data import raw, header, standardize_this | |
from writeapriorifile import * | |
X = raw | |
X = binarize(X) | |
np.savetxt("test_file.txt",X) | |
WriteAprioriFile(X) | |
if platform.startswith('linux'): #== "linux" or platform == "linux2": | |
ext = '' # Linux | |
dir_sep = '/' | |
elif platform.startswith('darwin'): #== "darwin": | |
ext = 'MAC' # OS X | |
dir_sep = '/' | |
elif platform.startswith('win'): #== "win32": | |
ext = '.exe' # Windows | |
dir_sep = '\\' | |
else: | |
raise NotImplementedError() | |
filename = "AprioriFile.txt" | |
minSup = 30 | |
minConf = 60 | |
maxRule = 4 | |
# Run Apriori Algorithm | |
print('Mining for frequent itemsets by the Apriori algorithm') | |
status1 = run('..{0}Tools{0}apriori{1} -f"," -s{2} -v"[Sup. %S]" {3} apriori_temp1.txt' | |
.format(dir_sep, ext, minSup, filename ), shell=True) | |
if status1.returncode != 0: | |
print('An error occurred while calling apriori, a likely cause is that minSup was set to high such that no ' | |
'frequent itemsets were generated or spaces are included in the path to the apriori files.') | |
exit() | |
if minConf > 0: | |
print('Mining for associations by the Apriori algorithm') | |
status2 = run('..{0}Tools{0}apriori{1} -tr -f"," -n{2} -c{3} -s{4} -v"[Conf. %C,Sup. %S]" {5} apriori_temp2.txt' | |
.format(dir_sep, ext, maxRule, minConf, minSup, filename ), shell=True) | |
if status2.returncode != 0: | |
print('An error occurred while calling apriori') | |
exit() | |
print('Apriori analysis done, extracting results') | |
# Extract information from stored files apriori_temp1.txt and apriori_temp2.txt | |
f = open('apriori_temp1.txt', 'r') | |
lines = f.readlines() | |
f.close() | |
# Extract Frequent Itemsets | |
FrequentItemsets = [''] * len(lines) | |
sup = np.zeros((len(lines), 1)) | |
for i, line in enumerate(lines): | |
FrequentItemsets[i] = line[0:-1] | |
sup[i] = re.findall(' [-+]?\d*\.\d+|\d+]', line)[0][1:-1] | |
os.remove('apriori_temp1.txt') | |
# Read the file | |
f = open('apriori_temp2.txt', 'r') | |
lines = f.readlines() | |
f.close() | |
# Extract Association rules | |
AssocRules = [''] * len(lines) | |
conf = np.zeros((len(lines), 1)) | |
for i, line in enumerate(lines): | |
AssocRules[i] = line[0:-1] | |
conf[i] = re.findall(' [-+]?\d*\.\d+|\d+,', line)[0][1:-1] | |
os.remove('apriori_temp2.txt') | |
# sort (FrequentItemsets by support value, AssocRules by confidence value) | |
AssocRulesSorted = [AssocRules[item] for item in np.argsort(conf, axis=0).ravel()] | |
AssocRulesSorted.reverse() | |
FrequentItemsetsSorted = [FrequentItemsets[item] for item in np.argsort(sup, axis=0).ravel()] | |
FrequentItemsetsSorted.reverse() | |
# Print the results | |
time.sleep(.5) | |
print('\n') | |
print('RESULTS:\n') | |
print('Frequent itemsets:') | |
for i, item in enumerate(FrequentItemsetsSorted): | |
print('Item: {0}'.format(item)) | |
print('\n') | |
print('Association rules:') | |
for i, item in enumerate(AssocRulesSorted): | |
print('Rule: {0}'.format(item)) | |
sns.set() | |
pos = list(range(len(FrequentItemsetsSorted))) | |
width = 0.3 | |
ind_list = [] | |
sup_list = [] | |
ticks_list = ['First intercourse','Hormonal Contraceptives','Age','Num pregnacies','Num of partners'] | |
for i in FrequentItemsetsSorted: | |
ind = int(i[0]) | |
ind_list.append(ind) | |
sup_list.append(int(i[len(i)-3:len(i)-1])) | |
fig, ax = plt.subplots(figsize=(10,5)) | |
plt.bar(pos, sup_list ,width, alpha=0.5, color='g') | |
#plt.bar([p + width for p in pos], , width, alpha=0.5, color='b') | |
ax.set_ylabel('Support %') | |
ax.set_title('Frequent itemsets') | |
ax.set_xticks([p + width/2 for p in pos]) | |
ax.set_xticklabels(ticks_list) | |
plt.xlim(min(pos)-width, max(pos)+width*2) | |
plt.ylim([0, 100]) | |
plt.legend(['Percentage'], loc='upper left') | |
filename = "Bar_items.png" | |
plt.savefig(filename,dpi=600) | |
plt.close() | |
pos = list(range(len(AssocRulesSorted))) | |
width = 0.2 | |
ind_list_cause = [] | |
ind_list_effect = [] | |
sup_list2 = [] | |
conf_list = [] | |
ticks_list = ['First intercourse','Hormonal Contraceptives','Age','Num pregnacies','Num of partners'] | |
for i in range(len(AssocRulesSorted)): | |
ind1 = int(AssocRulesSorted[i][0]) | |
ind2 = int(AssocRulesSorted[i][5]) | |
ind_list_cause.append(ind2) | |
ind_list_effect.append(ind1) | |
sup_list2.append(int(AssocRulesSorted[i][21:23])) | |
conf_list.append(int(AssocRulesSorted[i][13:15])) | |
ticks_list2 = ['First intercourse->Age','Num pregnacies->Age'] | |
fig, ax = plt.subplots(figsize=(8,5)) | |
plt.bar(pos, sup_list2,width, alpha=0.5, color='g') | |
plt.bar([p + width for p in pos], conf_list, width, alpha=0.5, color='b') | |
ax.set_ylabel('Percentage') | |
ax.set_title('Association rules') | |
ax.set_xticks([p + 1.5 * width for p in pos]) | |
ax.set_xticklabels(ticks_list2) | |
plt.xlim(min(pos)-width, max(pos)+width*4) | |
plt.ylim([0, 100]) | |
plt.legend(['Support', 'Confidence'], loc='upper left') | |
filename = "Assoc_rules.png" | |
plt.savefig(filename,dpi=600) | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment