Skip to content

Instantly share code, notes, and snippets.

@MJ111
Created March 3, 2019 06:30
Show Gist options
  • Save MJ111/b8e4ac0bea749cbe8b19c64e239f5fc1 to your computer and use it in GitHub Desktop.
Save MJ111/b8e4ac0bea749cbe8b19c64e239f5fc1 to your computer and use it in GitHub Desktop.
[pattern discovery - apriori] feedback'll be appreciated.
import itertools
abs_min_support = 771
freq_items = [None, {}]
# part1
counting = {}
with open('categories.txt', 'r') as f:
while True:
line = f.readline()
if not line:
break
for c in line.split(';'):
c = c.strip()
if c in counting:
counting[c] += 1
else:
counting[c] = 1
with open('patterns.txt', 'w') as f:
for k, v in counting.items():
if v > abs_min_support:
freq_items[1][k] = v
f.write(f'{v}:{k}\n')
# part2
tdb = []
indexed_tdb = {}
max_count = 0
with open('categories.txt', 'r') as f:
while True:
line = f.readline()
if not line:
break
items = list(map(lambda x: x.strip(), line.split(';')))
max_count = max(max_count, len(items))
tdb.append(items)
for item in items:
if item in indexed_tdb:
indexed_tdb[item].append(len(tdb) - 1)
else:
indexed_tdb[item] = [len(tdb) - 1]
prev_freq = freq_items[1].keys()
for count in range(2, max_count + 1):
counting = {}
combs = list(itertools.combinations(prev_freq, count))
for comb in combs:
get_related_tdb = []
flatten_comb = list(map(lambda x: x.split(';'), comb))
flatten_comb = list(itertools.chain.from_iterable(flatten_comb))
comb = set(flatten_comb)
if len(comb) != count:
continue
for c in comb:
get_related_tdb += indexed_tdb[c]
for index in set(get_related_tdb):
line = tdb[index]
for c in comb:
if c not in line:
break
else:
comb = list(comb)
comb.sort()
key = ';'.join(comb)
if key in counting:
counting[key] += 1
else:
counting[key] = 1
freq_items.append({})
for k, v in counting.items():
if v > abs_min_support:
freq_items[count][k] = v
prev_freq = freq_items[count].keys()
with open('patterns2.txt', 'w') as f:
for freq in freq_items[1:]:
for k, v in freq.items():
f.write(f'{v}:{k}\n')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment