Created
October 27, 2015 20:33
-
-
Save fscottfoti/2cd34384e8aff39ad185 to your computer and use it in GitHub Desktop.
ipf, yea! see elsewhere for description of ipf.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# this should be fairly self explanitory if you know ipf | |
# seed_matrix is your best bet at the totals, col_marginals are | |
# observed column marginals and row_marginals is the same for rows | |
def simple_ipf(seed_matrix, col_marginals, row_marginals, tolerance=1, cnt=0): | |
assert np.absolute(row_marginals.sum() - col_marginals.sum()) < 5.0 | |
# first normalize on columns | |
ratios = col_marginals / seed_matrix.sum(axis=0) | |
seed_matrix *= ratios | |
closeness = np.absolute(row_marginals - seed_matrix.sum(axis=1)).sum() | |
assert np.absolute(col_marginals - seed_matrix.sum(axis=0)).sum() < .01 | |
# print "row closeness", closeness | |
if closeness < tolerance: | |
return seed_matrix | |
# first normalize on rows | |
ratios = row_marginals / seed_matrix.sum(axis=1) | |
ratios[row_marginals == 0] = 0 | |
seed_matrix = seed_matrix * ratios.reshape((ratios.size, 1)) | |
assert np.absolute(row_marginals - seed_matrix.sum(axis=1)).sum() < .01 | |
closeness = np.absolute(col_marginals - seed_matrix.sum(axis=0)).sum() | |
# print "col closeness", closeness | |
if closeness < tolerance: | |
return seed_matrix | |
if cnt >= 50: | |
return seed_matrix | |
return simple_ipf(seed_matrix, col_marginals, row_marginals, | |
tolerance, cnt+1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment