Skip to content

Instantly share code, notes, and snippets.

@mwatts15
Created May 21, 2016 15:23
Show Gist options
  • Save mwatts15/88845ce75249235238dd09beffc9aa87 to your computer and use it in GitHub Desktop.
Save mwatts15/88845ce75249235238dd09beffc9aa87 to your computer and use it in GitHub Desktop.
Script for translating edges
from __future__ import print_function
import xlrd
wb = xlrd.open_workbook(
'./Differentiation Tree Dataset/differentiation-tree-all-variables.1.xlsx')
sheet = wb.sheet_by_name('RelativeSize')
# Column indices
PARENT = 1
CHILDA = 2
CHILDB = 3
LARGER = 4
ASYMMETRIC = 5
ROW_OFFSET = 1
LARGE_LABEL = 'L'
SMALL_LABEL = 'S'
ASYM_LABEL = 'A'
SAME_LABEL = 'Z'
existing_idents = set()
def idgen(x):
z = '|'.join(str(y) for y in x)
s = hash(z)
if s in existing_idents:
raise Exception("Possible identity hash collision with "
"data='{}' and hash={}".format(z, s))
s += 1
existing_idents.add(s)
return s
def g(sheet, row, idx):
return sheet.cell_value(row + ROW_OFFSET, idx)
def p(ident, s, t, label):
print("{},{},{},{}".format(ident, s, t, label))
def pi(s, t, label):
p(idgen((s, t, label)), s, t, label)
def main():
p('id', 'source', 'target', 'label')
for row_num in range(0, sheet.nrows - ROW_OFFSET):
larger = g(sheet, row_num, LARGER)
is_asymmetric = (g(sheet, row_num, ASYMMETRIC) == 1)
parent = g(sheet, row_num, PARENT)
a = g(sheet, row_num, CHILDA)
b = g(sheet, row_num, CHILDB)
if larger != 'X' and not is_asymmetric:
if len(a) == 0 or len(b) == 0:
raise Exception(
"One or both child cells are empty"
" for a symmetrical division on row {}."
" Values are {}".format(
row_num + ROW_OFFSET + 1,
(parent, a, b, larger, is_asymmetric)))
alabel = LARGE_LABEL if larger == 'A' else SMALL_LABEL
blabel = LARGE_LABEL if larger == 'B' else SMALL_LABEL
pi(parent, a, alabel)
pi(parent, b, blabel)
elif is_asymmetric:
if len(a) > 0 and len(b) == 0:
pi(parent, a, ASYM_LABEL)
elif len(b) > 0 and len(a) == 0:
pi(parent, b, ASYM_LABEL)
elif len(a) > 0 and len(b) > 0:
raise Exception(
"Asymmetrical division has 2 children on row {}".format(
row_num +
ROW_OFFSET +
1))
elif len(a) > 0 and len(b) > 0:
pi(parent, a, SAME_LABEL)
pi(parent, b, SAME_LABEL)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment