Created
December 12, 2013 02:46
-
-
Save puredevotion/7922480 to your computer and use it in GitHub Desktop.
Complete code for issue with int is not subscriptable issue
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"aesthetic": { | |
"feeling": { | |
"the": { | |
"feeling": { | |
"of": 10 | |
} | |
} | |
}, | |
"perception": { | |
"of": { | |
"the": { | |
"world": 20, | |
"work": 30 | |
}, | |
"their": { | |
"female": 40 | |
} | |
}, | |
"and": { | |
"the": { | |
"works": 50, | |
"work": 5, | |
"environment": 5, | |
"design": 15, | |
"painting": 15, | |
"music": 100 | |
} | |
} | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Created on 17 apr. 2013 | |
@author: puredevotion | |
''' | |
import collections | |
import sys | |
import json | |
import linecache | |
from pprint import pprint | |
from datetime import datetime | |
global output_file | |
def init(rNgram_file="ag.txt", rOutput_file="ag.json"): | |
''' | |
Constructor, creates new file, creates freq_list et al. | |
''' | |
global output_file | |
output_file = rOutput_file | |
try: | |
with open(rNgram_file, 'r', encoding='utf-8') as ngram_file: | |
data = json.load(ngram_file) | |
print(datetime.now()) | |
print("reading " + rNgram_file) | |
data = rank_items(data) | |
data = probability_items(data) | |
write_to_file(data) | |
ngram_file.close() | |
except IOError: | |
sys.exit("cannot open {0}".format(rNgram_file)) | |
def rank_items(data): | |
''' | |
Rank all ngrams found in a file | |
Each file has a tree (in JSON) with 0-4grams. | |
These are all ranked, and then a JSON file is Created | |
Ranking is counting the occurences of an underlying group of ngrams | |
''' | |
ngram_rank = one_gram_rank = two_gram_rank = three_gram_rank = 0 | |
try: | |
for ngram, one_grams in data.items(): | |
ngram_rank = 0 | |
for one_gram, two_grams in one_grams.items(): | |
one_gram_rank = 0 | |
for two_gram, three_grams in two_grams.items(): | |
two_gram_rank = 0 | |
for three_gram, four_grams in three_grams.items(): | |
three_gram_rank = 0 | |
if isinstance(four_grams, collections.Mapping): | |
for four_gram, values in four_grams.items(): | |
three_gram_rank += values | |
else: | |
print("----------------NO DICT----------------") | |
four_grams['_rank'] = int(three_gram_rank) | |
two_gram_rank += three_gram_rank | |
three_grams['_rank'] = int(two_gram_rank) | |
one_gram_rank += two_gram_rank | |
two_grams['_rank'] = int(one_gram_rank) | |
ngram_rank += one_gram_rank | |
one_grams['_rank'] = int(ngram_rank) | |
except IndexError as e: | |
print("index bestaat niet: ", e) | |
pass | |
except AttributeError as e: | |
print("attr error: ", e) | |
pass | |
except EOFError as e: | |
print("eof: ", e) | |
pass | |
except: | |
print("Unexpected error:", sys.exc_info()[0]) | |
pass | |
pprint(data) | |
return data | |
def probability_items(data): | |
''' | |
calculate the probability of al the ranks | |
''' | |
pprint(data) | |
try: | |
for ngram, one_grams in data.items(): | |
ngram_rank = int(one_grams['_rank']) | |
print("NgramRank: ", str(ngram_rank)) | |
if ngram != '_rank': | |
for one_gram, two_grams in one_grams.items(): | |
pprint(type(two_grams['_rank'])) | |
one_gram_rank = str(two_grams['_rank']) | |
if one_gram != '_rank': | |
for two_gram, three_grams in two_grams.items(): | |
pprint(type(three_grams['_rank'])) | |
pprint(str(three_grams['_rank'])) | |
two_gram_rank = str(three_grams['_rank']) | |
if two_gram != '_rank': | |
for three_gram, four_grams in three_grams.items(): | |
pprint(type(four_grams['_rank'])) | |
pprint(str(four_grams['_rank'])) | |
three_gram_rank = str(four_grams['_rank']) | |
if three_gram != '_rank': | |
if isinstance(four_grams, collections.Mapping): | |
for four_gram, values in four_grams.items(): | |
if four_gram != '_rank': | |
print("4gram "+four_gram+": ", str(values)) | |
four_gram_prob = int(values) / int(three_gram_rank) | |
print("three_rank: "+str(three_gram_rank)+" prob: ", str(four_gram_prob)) | |
four_grams[four_gram] = {'_rank': values, '_prob': four_gram_prob} | |
else: | |
print("----------------NO DICT----------------") | |
three_gram_prob = int(three_gram_rank) / int(two_gram_rank) | |
four_grams['_prob'] = three_gram_prob | |
two_gram_prob = int(two_gram_rank) / int(one_gram_rank) | |
three_grams['_prob'] = two_gram_prob | |
one_gram_prob = int(one_gram_rank) / int(ngram_rank) | |
two_grams['_prob'] = one_gram_prob | |
ngram_prob = int(ngram_rank) / int(ngram_rank) | |
one_grams['_prob'] = ngram_prob | |
except IndexError: | |
PrintException() | |
pass | |
except AttributeError: | |
PrintException() | |
pass | |
except EOFError: | |
PrintException() | |
pass | |
except TypeError: | |
PrintException() | |
pass | |
except: | |
PrintException() | |
pass | |
return data | |
def write_to_file(data): | |
global output_file | |
try: | |
with open(output_file, 'w', encoding='utf8') as output_file: | |
json.dump(data, output_file, indent=4) | |
print("Succesfully wrote all ranks to output file!") | |
except IOError as err: | |
sys.exit("I/O error: {0}".format(err)) | |
def PrintException(): | |
exc_type, exc_obj, tb = sys.exc_info() | |
f = tb.tb_frame | |
lineno = tb.tb_lineno | |
filename = f.f_code.co_filename | |
linecache.checkcache(filename) | |
line = linecache.getline(filename, lineno, f.f_globals) | |
print('Exception in on line {}: {}: {}'.format(lineno, line.strip(), exc_obj)) | |
init(rNgram_file="ae.json", rOutput_file="json.json") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"aesthetic": { | |
"perception": { | |
"of": { | |
"the": { | |
"_prob": 0.5555555555555556, | |
"_rank": 50, | |
"work": { | |
"_prob": 0.6, | |
"_rank": 30 | |
}, | |
"world": { | |
"_prob": 0.4, | |
"_rank": 20 | |
} | |
}, | |
"their": { | |
"_prob": 0.4444444444444444, | |
"_rank": 40, | |
"female": { | |
"_prob": 1.0, | |
"_rank": 40 | |
} | |
}, | |
"_rank": 90 | |
}, | |
"_rank": 280, | |
"and": { | |
"the": { | |
"works": 50, | |
"_rank": 190, | |
"design": 15, | |
"work": 5, | |
"environment": 5, | |
"music": 100, | |
"painting": 15 | |
}, | |
"_rank": 190 | |
} | |
}, | |
"_rank": 290, | |
"feeling": { | |
"the": { | |
"_rank": 10, | |
"feeling": { | |
"of": 10, | |
"_rank": 10 | |
} | |
}, | |
"_rank": 10 | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment