seominjoon/bif2json.py

## bif2json.py
#!/usr/bin/env python
"""
--------------------------------------------------------------------------------
This code is modified version of:
https://github.com/eBay/bayesian-belief-networks/blob/master/bayesian/examples/bif/bif_parser.py.

NOTE: regex pattern of this parser is sensitive to spaces (will be fixed soon).
The output json file can be iterated with the following pseudo-code (d is the loaded json):

variables = d['variables']
edges = d['edges']

for variable_name, domain in variables:
    # domain is the list of possible values that variable can take

for variable_names, dist in edges:
    # variable_names[:-1] are the parents, and variable_names[-1] is the current
    for variable_vals, prob in dist:
        # prob is the probability of variable_names[-1] = variable_vals[-1],
        # given variable_names[:-1] = variable_vals[:-1]
--------------------------------------------------------------------------------
"""

import re
import argparse
import json

__author__ = "Minjoon Seo"
__email__ = "seominjoon@gmail.com"


def get_args():
    parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter, description=__doc__)
    parser.add_argument("bif_path", help="path to the input bif file.")
    parser.add_argument("json_path", help="path to the output json file.")
    return parser.parse_args()


def bif2json(args):
    infile = open(args.bif_path, "rb")
    infile.readline()
    infile.readline()

    # Regex patterns for parsing
    variable_pattern = re.compile(r"  type discrete \[ \d+ \] \{ (.+) \};\s*")
    prior_probability_pattern_1 = re.compile(
        r"probability \( ([^|]+) \) \{\s*")
    prior_probability_pattern_2 = re.compile(r"  table (.+);\s*")
    conditional_probability_pattern_1 = (
        re.compile(r"probability \((.+)\|(.+)\) \{\s*"))
    conditional_probability_pattern_2 = re.compile(r"  \((.+)\) (.+);\s*")

    variables = []  # domains
    variable_dict = {}
    edges = []

    # For every line in the file
    while True:
        line = infile.readline()

        # End of file
        if not line:
            break

        # Variable declaration
        if line.startswith("variable"):
            match = variable_pattern.match(infile.readline())

            # Extract domain and place into dictionary
            if match:
                key, val = line[9:-3], re.split('\s*,\s*', match.group(1).lstrip().rstrip())
                variables.append([key, val])
                variable_dict[key] = val
            else:
                raise Exception("Unrecognised variable declaration:\n" + line)
            infile.readline()

        # Probability distribution
        elif line.startswith("probability"):

            match = prior_probability_pattern_1.match(line)
            if match:

                # Prior probabilities
                variable = match.group(1).lstrip().rstrip()
                line = infile.readline()
                match = prior_probability_pattern_2.match(line)
                infile.readline()  # }

                edges.append([[variable], zip(([x] for x in variable_dict[variable]), map(float, re.split('\s*,\s*', match.group(1).lstrip().rstrip())))])

            else:
                match = conditional_probability_pattern_1.match(line)
                if match:

                    # Conditional probabilities
                    variable = match.group(1).lstrip().rstrip()
                    given = match.group(2).lstrip().rstrip()

                    d = []

                    # Iterate through the conditional probability table
                    while True:
                        line = infile.readline()  # line of the CPT
                        if line == '}\n':
                            break
                        match = conditional_probability_pattern_2.match(line)
                        given_values = re.split('\s*,\s*', match.group(1).lstrip().rstrip())
                        for value, prob in zip(variable_dict[variable], map(float, re.split('\s*,\s*', match.group(2).lstrip().rstrip()))):
                            key = list(given_values) + [value]
                            d.append([key, prob])

                    key = tuple(re.split('\s*,\s*', given)) + (variable,)
                    edges.append([key, d])
                else:
                    raise Exception(
                        "Unrecognised probability declaration:\n" + line)

    # sanity check
    for variable_names, dist in edges:
        for variable_name in variable_names:
            assert variable_name in variable_dict, "%r not in %r" % (variable_name, variable_dict.keys())
        for variable_vals, prob in dist:
            for variable_name, variable_val in zip(variable_names, variable_vals):
                assert variable_val in variable_dict[variable_name], "%r not in %r" % (variable_val, variable_dict[variable_name])

    out = {'edges': edges, 'variables': variables}
    json.dump(out, open(args.json_path, "wb"))

if __name__ == "__main__":
    ARGS = get_args()
    bif2json(ARGS)
	#!/usr/bin/env python
	"""
	--------------------------------------------------------------------------------
	This code is modified version of:
	https://github.com/eBay/bayesian-belief-networks/blob/master/bayesian/examples/bif/bif_parser.py.

	NOTE: regex pattern of this parser is sensitive to spaces (will be fixed soon).
	The output json file can be iterated with the following pseudo-code (d is the loaded json):

	variables = d['variables']
	edges = d['edges']

	for variable_name, domain in variables:
	# domain is the list of possible values that variable can take

	for variable_names, dist in edges:
	# variable_names[:-1] are the parents, and variable_names[-1] is the current
	for variable_vals, prob in dist:
	# prob is the probability of variable_names[-1] = variable_vals[-1],
	# given variable_names[:-1] = variable_vals[:-1]
	--------------------------------------------------------------------------------
	"""

	import re
	import argparse
	import json

	__author__ = "Minjoon Seo"
	__email__ = "seominjoon@gmail.com"


	def get_args():
	parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter, description=__doc__)
	parser.add_argument("bif_path", help="path to the input bif file.")
	parser.add_argument("json_path", help="path to the output json file.")
	return parser.parse_args()


	def bif2json(args):
	infile = open(args.bif_path, "rb")
	infile.readline()
	infile.readline()

	# Regex patterns for parsing
	variable_pattern = re.compile(r" type discrete \[ \d+ \] \{ (.+) \};\s*")
	prior_probability_pattern_1 = re.compile(
	r"probability \( ([^\|]+) \) \{\s*")
	prior_probability_pattern_2 = re.compile(r" table (.+);\s*")
	conditional_probability_pattern_1 = (
	re.compile(r"probability \((.+)\\|(.+)\) \{\s*"))
	conditional_probability_pattern_2 = re.compile(r" \((.+)\) (.+);\s*")

	variables = [] # domains
	variable_dict = {}
	edges = []

	# For every line in the file
	while True:
	line = infile.readline()

	# End of file
	if not line:
	break

	# Variable declaration
	if line.startswith("variable"):
	match = variable_pattern.match(infile.readline())

	# Extract domain and place into dictionary
	if match:
	key, val = line[9:-3], re.split('\s,\s', match.group(1).lstrip().rstrip())
	variables.append([key, val])
	variable_dict[key] = val
	else:
	raise Exception("Unrecognised variable declaration:\n" + line)
	infile.readline()

	# Probability distribution
	elif line.startswith("probability"):

	match = prior_probability_pattern_1.match(line)
	if match:

	# Prior probabilities
	variable = match.group(1).lstrip().rstrip()
	line = infile.readline()
	match = prior_probability_pattern_2.match(line)
	infile.readline() # }

	edges.append([[variable], zip(([x] for x in variable_dict[variable]), map(float, re.split('\s,\s', match.group(1).lstrip().rstrip())))])

	else:
	match = conditional_probability_pattern_1.match(line)
	if match:

	# Conditional probabilities
	variable = match.group(1).lstrip().rstrip()
	given = match.group(2).lstrip().rstrip()

	d = []

	# Iterate through the conditional probability table
	while True:
	line = infile.readline() # line of the CPT
	if line == '}\n':
	break
	match = conditional_probability_pattern_2.match(line)
	given_values = re.split('\s,\s', match.group(1).lstrip().rstrip())
	for value, prob in zip(variable_dict[variable], map(float, re.split('\s,\s', match.group(2).lstrip().rstrip()))):
	key = list(given_values) + [value]
	d.append([key, prob])

	key = tuple(re.split('\s,\s', given)) + (variable,)
	edges.append([key, d])
	else:
	raise Exception(
	"Unrecognised probability declaration:\n" + line)

	# sanity check
	for variable_names, dist in edges:
	for variable_name in variable_names:
	assert variable_name in variable_dict, "%r not in %r" % (variable_name, variable_dict.keys())
	for variable_vals, prob in dist:
	for variable_name, variable_val in zip(variable_names, variable_vals):
	assert variable_val in variable_dict[variable_name], "%r not in %r" % (variable_val, variable_dict[variable_name])

	out = {'edges': edges, 'variables': variables}
	json.dump(out, open(args.json_path, "wb"))

	if __name__ == "__main__":
	ARGS = get_args()
	bif2json(ARGS)