Skip to content

Instantly share code, notes, and snippets.

@seominjoon
Last active February 23, 2016 22:41
Show Gist options
  • Save seominjoon/e059b6fb51a3313fe800 to your computer and use it in GitHub Desktop.
Save seominjoon/e059b6fb51a3313fe800 to your computer and use it in GitHub Desktop.
Convert .bif file to .json file
#!/usr/bin/env python
"""
--------------------------------------------------------------------------------
This code is modified version of:
https://github.com/eBay/bayesian-belief-networks/blob/master/bayesian/examples/bif/bif_parser.py.
NOTE: regex pattern of this parser is sensitive to spaces (will be fixed soon).
The output json file can be iterated with the following pseudo-code (d is the loaded json):
variables = d['variables']
edges = d['edges']
for variable_name, domain in variables:
# domain is the list of possible values that variable can take
for variable_names, dist in edges:
# variable_names[:-1] are the parents, and variable_names[-1] is the current
for variable_vals, prob in dist:
# prob is the probability of variable_names[-1] = variable_vals[-1],
# given variable_names[:-1] = variable_vals[:-1]
--------------------------------------------------------------------------------
"""
import re
import argparse
import json
__author__ = "Minjoon Seo"
__email__ = "seominjoon@gmail.com"
def get_args():
parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter, description=__doc__)
parser.add_argument("bif_path", help="path to the input bif file.")
parser.add_argument("json_path", help="path to the output json file.")
return parser.parse_args()
def bif2json(args):
infile = open(args.bif_path, "rb")
infile.readline()
infile.readline()
# Regex patterns for parsing
variable_pattern = re.compile(r" type discrete \[ \d+ \] \{ (.+) \};\s*")
prior_probability_pattern_1 = re.compile(
r"probability \( ([^|]+) \) \{\s*")
prior_probability_pattern_2 = re.compile(r" table (.+);\s*")
conditional_probability_pattern_1 = (
re.compile(r"probability \((.+)\|(.+)\) \{\s*"))
conditional_probability_pattern_2 = re.compile(r" \((.+)\) (.+);\s*")
variables = [] # domains
variable_dict = {}
edges = []
# For every line in the file
while True:
line = infile.readline()
# End of file
if not line:
break
# Variable declaration
if line.startswith("variable"):
match = variable_pattern.match(infile.readline())
# Extract domain and place into dictionary
if match:
key, val = line[9:-3], re.split('\s*,\s*', match.group(1).lstrip().rstrip())
variables.append([key, val])
variable_dict[key] = val
else:
raise Exception("Unrecognised variable declaration:\n" + line)
infile.readline()
# Probability distribution
elif line.startswith("probability"):
match = prior_probability_pattern_1.match(line)
if match:
# Prior probabilities
variable = match.group(1).lstrip().rstrip()
line = infile.readline()
match = prior_probability_pattern_2.match(line)
infile.readline() # }
edges.append([[variable], zip(([x] for x in variable_dict[variable]), map(float, re.split('\s*,\s*', match.group(1).lstrip().rstrip())))])
else:
match = conditional_probability_pattern_1.match(line)
if match:
# Conditional probabilities
variable = match.group(1).lstrip().rstrip()
given = match.group(2).lstrip().rstrip()
d = []
# Iterate through the conditional probability table
while True:
line = infile.readline() # line of the CPT
if line == '}\n':
break
match = conditional_probability_pattern_2.match(line)
given_values = re.split('\s*,\s*', match.group(1).lstrip().rstrip())
for value, prob in zip(variable_dict[variable], map(float, re.split('\s*,\s*', match.group(2).lstrip().rstrip()))):
key = list(given_values) + [value]
d.append([key, prob])
key = tuple(re.split('\s*,\s*', given)) + (variable,)
edges.append([key, d])
else:
raise Exception(
"Unrecognised probability declaration:\n" + line)
# sanity check
for variable_names, dist in edges:
for variable_name in variable_names:
assert variable_name in variable_dict, "%r not in %r" % (variable_name, variable_dict.keys())
for variable_vals, prob in dist:
for variable_name, variable_val in zip(variable_names, variable_vals):
assert variable_val in variable_dict[variable_name], "%r not in %r" % (variable_val, variable_dict[variable_name])
out = {'edges': edges, 'variables': variables}
json.dump(out, open(args.json_path, "wb"))
if __name__ == "__main__":
ARGS = get_args()
bif2json(ARGS)
@seominjoon
Copy link
Author

Fixed. Regex is very badly written...

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment