puredevotion/ae.json

## ae.json
{
    "aesthetic": {
        "feeling": {
            "the": {
                "feeling": {
                    "of": 10
                }
            }
        },
        "perception": {
            "of": {
                "the": {
                    "world": 20,
                    "work": 30
                },
                "their": {
                    "female": 40
                }
            },
            "and": {
                "the": {
                    "works": 50,
                    "work": 5,
                    "environment": 5,
                    "design": 15,
                    "painting": 15,
                    "music": 100
                }
            }
        }
    }
}

## main.py
'''
Created on 17 apr. 2013

@author: puredevotion
'''
import collections
import sys
import json
import linecache
from pprint import pprint
from datetime import datetime

global output_file


def init(rNgram_file="ag.txt", rOutput_file="ag.json"):
    '''
    Constructor, creates new file, creates freq_list et al.
    '''
    global output_file
    output_file = rOutput_file

    try:
        with open(rNgram_file, 'r', encoding='utf-8') as ngram_file:
            data = json.load(ngram_file)
            print(datetime.now())
            print("reading " + rNgram_file)
            data = rank_items(data)
            data = probability_items(data)
            write_to_file(data)
            ngram_file.close()
    except IOError:
        sys.exit("cannot open {0}".format(rNgram_file))


def rank_items(data):
    '''
    Rank all ngrams found in a file
    Each file has a tree (in JSON) with 0-4grams.
    These are all ranked, and then a JSON file is Created
    Ranking is counting the occurences of an underlying group of ngrams
    '''

    ngram_rank = one_gram_rank = two_gram_rank = three_gram_rank = 0
    try:
        for ngram, one_grams in data.items():
            ngram_rank = 0
            for one_gram, two_grams in one_grams.items():
                one_gram_rank = 0
                for two_gram, three_grams in two_grams.items():
                    two_gram_rank = 0
                    for three_gram, four_grams in three_grams.items():
                        three_gram_rank = 0
                        if isinstance(four_grams, collections.Mapping):
                            for four_gram, values in four_grams.items():
                                three_gram_rank += values
                        else:
                            print("----------------NO DICT----------------")
                        four_grams['_rank'] = int(three_gram_rank)
                        two_gram_rank += three_gram_rank
                    three_grams['_rank'] = int(two_gram_rank)
                    one_gram_rank += two_gram_rank
                two_grams['_rank'] = int(one_gram_rank)
                ngram_rank += one_gram_rank
            one_grams['_rank'] = int(ngram_rank)

    except IndexError as e:
        print("index bestaat niet: ", e)
        pass
    except AttributeError as e:
        print("attr error: ", e)
        pass
    except EOFError as e:
        print("eof: ", e)
        pass
    except:
        print("Unexpected error:", sys.exc_info()[0])
        pass

    pprint(data)
    return data


def probability_items(data):
    '''
    calculate the probability of al the ranks
    '''
    pprint(data)

    try:
        for ngram, one_grams in data.items():
            ngram_rank = int(one_grams['_rank'])
            print("NgramRank: ", str(ngram_rank))
            if ngram != '_rank':
                for one_gram, two_grams in one_grams.items():
                    pprint(type(two_grams['_rank']))
                    one_gram_rank = str(two_grams['_rank'])
                    if one_gram != '_rank':
                        for two_gram, three_grams in two_grams.items():
                            pprint(type(three_grams['_rank']))
                            pprint(str(three_grams['_rank']))
                            two_gram_rank = str(three_grams['_rank'])
                            if two_gram != '_rank':
                                for three_gram, four_grams in three_grams.items():
                                    pprint(type(four_grams['_rank']))
                                    pprint(str(four_grams['_rank']))
                                    three_gram_rank = str(four_grams['_rank'])
                                    if three_gram != '_rank':
                                        if isinstance(four_grams, collections.Mapping):
                                            for four_gram, values in four_grams.items():
                                                if four_gram != '_rank':
                                                    print("4gram "+four_gram+": ", str(values))
                                                    four_gram_prob = int(values) / int(three_gram_rank)
                                                    print("three_rank: "+str(three_gram_rank)+" prob: ", str(four_gram_prob))
                                                    four_grams[four_gram] = {'_rank': values, '_prob': four_gram_prob}
                                        else:
                                            print("----------------NO DICT----------------")
                                        three_gram_prob = int(three_gram_rank) / int(two_gram_rank)
                                        four_grams['_prob'] = three_gram_prob
                                two_gram_prob = int(two_gram_rank) / int(one_gram_rank)
                                three_grams['_prob'] = two_gram_prob
                        one_gram_prob = int(one_gram_rank) / int(ngram_rank)
                        two_grams['_prob'] = one_gram_prob
                ngram_prob = int(ngram_rank) / int(ngram_rank)
                one_grams['_prob'] = ngram_prob

    except IndexError:
        PrintException()
        pass
    except AttributeError:
        PrintException()
        pass
    except EOFError:
        PrintException()
        pass
    except TypeError:
        PrintException()
        pass
    except:
        PrintException()
        pass

    return data


def write_to_file(data):
    global output_file
    try:
        with open(output_file, 'w', encoding='utf8') as output_file:
            json.dump(data, output_file, indent=4)
        print("Succesfully wrote all ranks to output file!")
    except IOError as err:
        sys.exit("I/O error: {0}".format(err))


def PrintException():
    exc_type, exc_obj, tb = sys.exc_info()
    f = tb.tb_frame
    lineno = tb.tb_lineno
    filename = f.f_code.co_filename
    linecache.checkcache(filename)
    line = linecache.getline(filename, lineno, f.f_globals)
    print('Exception in on line {}: {}: {}'.format(lineno, line.strip(), exc_obj))

init(rNgram_file="ae.json", rOutput_file="json.json")

## output.json
{
    "aesthetic": {
        "perception": {
            "of": {
                "the": {
                    "_prob": 0.5555555555555556,
                    "_rank": 50,
                    "work": {
                        "_prob": 0.6,
                        "_rank": 30
                    },
                    "world": {
                        "_prob": 0.4,
                        "_rank": 20
                    }
                },
                "their": {
                    "_prob": 0.4444444444444444,
                    "_rank": 40,
                    "female": {
                        "_prob": 1.0,
                        "_rank": 40
                    }
                },
                "_rank": 90
            },
            "_rank": 280,
            "and": {
                "the": {
                    "works": 50,
                    "_rank": 190,
                    "design": 15,
                    "work": 5,
                    "environment": 5,
                    "music": 100,
                    "painting": 15
                },
                "_rank": 190
            }
        },
        "_rank": 290,
        "feeling": {
            "the": {
                "_rank": 10,
                "feeling": {
                    "of": 10,
                    "_rank": 10
                }
            },
            "_rank": 10
        }
    }
}
	{
	"aesthetic": {
	"feeling": {
	"the": {
	"feeling": {
	"of": 10
	}
	}
	},
	"perception": {
	"of": {
	"the": {
	"world": 20,
	"work": 30
	},
	"their": {
	"female": 40
	}
	},
	"and": {
	"the": {
	"works": 50,
	"work": 5,
	"environment": 5,
	"design": 15,
	"painting": 15,
	"music": 100
	}
	}
	}
	}
	}
	'''
	Created on 17 apr. 2013

	@author: puredevotion
	'''
	import collections
	import sys
	import json
	import linecache
	from pprint import pprint
	from datetime import datetime

	global output_file


	def init(rNgram_file="ag.txt", rOutput_file="ag.json"):
	'''
	Constructor, creates new file, creates freq_list et al.
	'''
	global output_file
	output_file = rOutput_file

	try:
	with open(rNgram_file, 'r', encoding='utf-8') as ngram_file:
	data = json.load(ngram_file)
	print(datetime.now())
	print("reading " + rNgram_file)
	data = rank_items(data)
	data = probability_items(data)
	write_to_file(data)
	ngram_file.close()
	except IOError:
	sys.exit("cannot open {0}".format(rNgram_file))


	def rank_items(data):
	'''
	Rank all ngrams found in a file
	Each file has a tree (in JSON) with 0-4grams.
	These are all ranked, and then a JSON file is Created
	Ranking is counting the occurences of an underlying group of ngrams
	'''

	ngram_rank = one_gram_rank = two_gram_rank = three_gram_rank = 0
	try:
	for ngram, one_grams in data.items():
	ngram_rank = 0
	for one_gram, two_grams in one_grams.items():
	one_gram_rank = 0
	for two_gram, three_grams in two_grams.items():
	two_gram_rank = 0
	for three_gram, four_grams in three_grams.items():
	three_gram_rank = 0
	if isinstance(four_grams, collections.Mapping):
	for four_gram, values in four_grams.items():
	three_gram_rank += values
	else:
	print("----------------NO DICT----------------")
	four_grams['_rank'] = int(three_gram_rank)
	two_gram_rank += three_gram_rank
	three_grams['_rank'] = int(two_gram_rank)
	one_gram_rank += two_gram_rank
	two_grams['_rank'] = int(one_gram_rank)
	ngram_rank += one_gram_rank
	one_grams['_rank'] = int(ngram_rank)

	except IndexError as e:
	print("index bestaat niet: ", e)
	pass
	except AttributeError as e:
	print("attr error: ", e)
	pass
	except EOFError as e:
	print("eof: ", e)
	pass
	except:
	print("Unexpected error:", sys.exc_info()[0])
	pass

	pprint(data)
	return data


	def probability_items(data):
	'''
	calculate the probability of al the ranks
	'''
	pprint(data)

	try:
	for ngram, one_grams in data.items():
	ngram_rank = int(one_grams['_rank'])
	print("NgramRank: ", str(ngram_rank))
	if ngram != '_rank':
	for one_gram, two_grams in one_grams.items():
	pprint(type(two_grams['_rank']))
	one_gram_rank = str(two_grams['_rank'])
	if one_gram != '_rank':
	for two_gram, three_grams in two_grams.items():
	pprint(type(three_grams['_rank']))
	pprint(str(three_grams['_rank']))
	two_gram_rank = str(three_grams['_rank'])
	if two_gram != '_rank':
	for three_gram, four_grams in three_grams.items():
	pprint(type(four_grams['_rank']))
	pprint(str(four_grams['_rank']))
	three_gram_rank = str(four_grams['_rank'])
	if three_gram != '_rank':
	if isinstance(four_grams, collections.Mapping):
	for four_gram, values in four_grams.items():
	if four_gram != '_rank':
	print("4gram "+four_gram+": ", str(values))
	four_gram_prob = int(values) / int(three_gram_rank)
	print("three_rank: "+str(three_gram_rank)+" prob: ", str(four_gram_prob))
	four_grams[four_gram] = {'_rank': values, '_prob': four_gram_prob}
	else:
	print("----------------NO DICT----------------")
	three_gram_prob = int(three_gram_rank) / int(two_gram_rank)
	four_grams['_prob'] = three_gram_prob
	two_gram_prob = int(two_gram_rank) / int(one_gram_rank)
	three_grams['_prob'] = two_gram_prob
	one_gram_prob = int(one_gram_rank) / int(ngram_rank)
	two_grams['_prob'] = one_gram_prob
	ngram_prob = int(ngram_rank) / int(ngram_rank)
	one_grams['_prob'] = ngram_prob

	except IndexError:
	PrintException()
	pass
	except AttributeError:
	PrintException()
	pass
	except EOFError:
	PrintException()
	pass
	except TypeError:
	PrintException()
	pass
	except:
	PrintException()
	pass

	return data


	def write_to_file(data):
	global output_file
	try:
	with open(output_file, 'w', encoding='utf8') as output_file:
	json.dump(data, output_file, indent=4)
	print("Succesfully wrote all ranks to output file!")
	except IOError as err:
	sys.exit("I/O error: {0}".format(err))


	def PrintException():
	exc_type, exc_obj, tb = sys.exc_info()
	f = tb.tb_frame
	lineno = tb.tb_lineno
	filename = f.f_code.co_filename
	linecache.checkcache(filename)
	line = linecache.getline(filename, lineno, f.f_globals)
	print('Exception in on line {}: {}: {}'.format(lineno, line.strip(), exc_obj))

	init(rNgram_file="ae.json", rOutput_file="json.json")