Skip to content

Instantly share code, notes, and snippets.

@arq5x
Last active November 13, 2018 15:25
Show Gist options
  • Save arq5x/5408712 to your computer and use it in GitHub Desktop.
Save arq5x/5408712 to your computer and use it in GitHub Desktop.
Convert Grantham Amino Acid matrix into Python dict.
#!/usr/bin/env python
import sys
import pprint
def make_grantham_dict(grantham_mat_file):
"""
Citation: http://www.ncbi.nlm.nih.gov/pubmed/4843792
Provenance: http://www.genome.jp/dbget-bin/www_bget?aaindex:GRAR740104
. A R N D C Q E G H I L K M F P S T W Y V
A 0 112 111 126 195 91 107 60 86 94 96 106 84 113 27 99 58 148 112 64
R 112 0 86 96 180 43 54 125 29 97 102 26 91 97 103 110 71 101 77 96
N 111 86 0 23 139 46 42 80 68 149 153 94 142 158 91 46 65 174 143 133
D 126 96 23 0 154 61 45 94 81 168 172 101 160 177 108 65 85 181 160 152
C 195 180 139 154 0 154 170 159 174 198 198 202 196 205 169 112 149 215 194 192
Q 91 43 46 61 154 0 29 87 24 109 113 53 101 116 76 68 42 130 99 96
E 107 54 42 45 170 29 0 98 40 134 138 56 126 140 93 80 65 152 122 121
G 60 125 80 94 159 87 98 0 98 135 138 127 127 153 42 56 59 184 147 109
H 86 29 68 81 174 24 40 98 0 94 99 32 87 100 77 89 47 115 83 84
I 94 97 149 168 198 109 134 135 94 0 5 102 10 21 95 142 89 61 33 29
L 96 102 153 172 198 113 138 138 99 5 0 107 15 22 98 145 92 61 36 32
K 106 26 94 101 202 53 56 127 32 102 107 0 95 102 103 121 78 110 85 97
M 84 91 142 160 196 101 126 127 87 10 15 95 0 28 87 135 81 67 36 21
F 113 97 158 177 205 116 140 153 100 21 22 102 28 0 114 155 103 40 22 50
P 27 103 91 108 169 76 93 42 77 95 98 103 87 114 0 74 38 147 110 68
S 99 110 46 65 112 68 80 56 89 142 145 121 135 155 74 0 58 177 144 124
T 58 71 65 85 149 42 65 59 47 89 92 78 81 103 38 58 0 128 92 69
W 148 101 174 181 215 130 152 184 115 61 61 110 67 40 147 177 128 0 37 88
Y 112 77 143 160 194 99 122 147 83 33 36 85 36 22 110 144 92 37 0 55
V 64 96 133 152 192 96 121 109 84 29 32 97 21 50 68 124 69 88 55 0
"""
f = open(grantham_mat_file)
header = f.next().strip().split('\t')
idx_to_aa = dict(zip(range(0,len(header)), header))
grantham_dict = {}
for line in f:
fields = line.strip().split('\t')
from_aa = fields[0]
for idx, score in enumerate(fields):
if idx == 0:
continue
to_aa = idx_to_aa[idx]
grantham_dict[(from_aa, to_aa)] = score
return grantham_dict
if __name__ == "__main__":
grantham_dict = make_grantham_dict(sys.argv[1])
pp = pprint.PrettyPrinter(indent=2)
pp.pprint(grantham_dict)
. A R N D C Q E G H I L K M F P S T W Y V
A 0 112 111 126 195 91 107 60 86 94 96 106 84 113 27 99 58 148 112 64
R 112 0 86 96 180 43 54 125 29 97 102 26 91 97 103 110 71 101 77 96
N 111 86 0 23 139 46 42 80 68 149 153 94 142 158 91 46 65 174 143 133
D 126 96 23 0 154 61 45 94 81 168 172 101 160 177 108 65 85 181 160 152
C 195 180 139 154 0 154 170 159 174 198 198 202 196 205 169 112 149 215 194 192
Q 91 43 46 61 154 0 29 87 24 109 113 53 101 116 76 68 42 130 99 96
E 107 54 42 45 170 29 0 98 40 134 138 56 126 140 93 80 65 152 122 121
G 60 125 80 94 159 87 98 0 98 135 138 127 127 153 42 56 59 184 147 109
H 86 29 68 81 174 24 40 98 0 94 99 32 87 100 77 89 47 115 83 84
I 94 97 149 168 198 109 134 135 94 0 5 102 10 21 95 142 89 61 33 29
L 96 102 153 172 198 113 138 138 99 5 0 107 15 22 98 145 92 61 36 32
K 106 26 94 101 202 53 56 127 32 102 107 0 95 102 103 121 78 110 85 97
M 84 91 142 160 196 101 126 127 87 10 15 95 0 28 87 135 81 67 36 21
F 113 97 158 177 205 116 140 153 100 21 22 102 28 0 114 155 103 40 22 50
P 27 103 91 108 169 76 93 42 77 95 98 103 87 114 0 74 38 147 110 68
S 99 110 46 65 112 68 80 56 89 142 145 121 135 155 74 0 58 177 144 124
T 58 71 65 85 149 42 65 59 47 89 92 78 81 103 38 58 0 128 92 69
W 148 101 174 181 215 130 152 184 115 61 61 110 67 40 147 177 128 0 37 88
Y 112 77 143 160 194 99 122 147 83 33 36 85 36 22 110 144 92 37 0 55
V 64 96 133 152 192 96 121 109 84 29 32 97 21 50 68 124 69 88 55 0
$ python grantham-dict.py grantham.matrix.txt
{ ('A', 'A'): '0',
('A', 'C'): '195',
('A', 'D'): '126',
('A', 'E'): '107',
('A', 'F'): '113',
('A', 'G'): '60',
('A', 'H'): '86',
('A', 'I'): '94',
('A', 'K'): '106',
('A', 'L'): '96',
('A', 'M'): '84',
('A', 'N'): '111',
('A', 'P'): '27',
('A', 'Q'): '91',
('A', 'R'): '112',
('A', 'S'): '99',
('A', 'T'): '58',
('A', 'V'): '64',
('A', 'W'): '148',
('A', 'Y'): '112',
('C', 'A'): '195',
('C', 'C'): '0',
('C', 'D'): '154',
('C', 'E'): '170',
('C', 'F'): '205',
('C', 'G'): '159',
('C', 'H'): '174',
('C', 'I'): '198',
('C', 'K'): '202',
('C', 'L'): '198',
('C', 'M'): '196',
('C', 'N'): '139',
('C', 'P'): '169',
('C', 'Q'): '154',
('C', 'R'): '180',
('C', 'S'): '112',
('C', 'T'): '149',
('C', 'V'): '192',
('C', 'W'): '215',
('C', 'Y'): '194',
('D', 'A'): '126',
('D', 'C'): '154',
('D', 'D'): '0',
('D', 'E'): '45',
('D', 'F'): '177',
('D', 'G'): '94',
('D', 'H'): '81',
('D', 'I'): '168',
('D', 'K'): '101',
('D', 'L'): '172',
('D', 'M'): '160',
('D', 'N'): '23',
('D', 'P'): '108',
('D', 'Q'): '61',
('D', 'R'): '96',
('D', 'S'): '65',
('D', 'T'): '85',
('D', 'V'): '152',
('D', 'W'): '181',
('D', 'Y'): '160',
('E', 'A'): '107',
('E', 'C'): '170',
('E', 'D'): '45',
('E', 'E'): '0',
('E', 'F'): '140',
('E', 'G'): '98',
('E', 'H'): '40',
('E', 'I'): '134',
('E', 'K'): '56',
('E', 'L'): '138',
('E', 'M'): '126',
('E', 'N'): '42',
('E', 'P'): '93',
('E', 'Q'): '29',
('E', 'R'): '54',
('E', 'S'): '80',
('E', 'T'): '65',
('E', 'V'): '121',
('E', 'W'): '152',
('E', 'Y'): '122',
('F', 'A'): '113',
('F', 'C'): '205',
('F', 'D'): '177',
('F', 'E'): '140',
('F', 'F'): '0',
('F', 'G'): '153',
('F', 'H'): '100',
('F', 'I'): '21',
('F', 'K'): '102',
('F', 'L'): '22',
('F', 'M'): '28',
('F', 'N'): '158',
('F', 'P'): '114',
('F', 'Q'): '116',
('F', 'R'): '97',
('F', 'S'): '155',
('F', 'T'): '103',
('F', 'V'): '50',
('F', 'W'): '40',
('F', 'Y'): '22',
('G', 'A'): '60',
('G', 'C'): '159',
('G', 'D'): '94',
('G', 'E'): '98',
('G', 'F'): '153',
('G', 'G'): '0',
('G', 'H'): '98',
('G', 'I'): '135',
('G', 'K'): '127',
('G', 'L'): '138',
('G', 'M'): '127',
('G', 'N'): '80',
('G', 'P'): '42',
('G', 'Q'): '87',
('G', 'R'): '125',
('G', 'S'): '56',
('G', 'T'): '59',
('G', 'V'): '109',
('G', 'W'): '184',
('G', 'Y'): '147',
('H', 'A'): '86',
('H', 'C'): '174',
('H', 'D'): '81',
('H', 'E'): '40',
('H', 'F'): '100',
('H', 'G'): '98',
('H', 'H'): '0',
('H', 'I'): '94',
('H', 'K'): '32',
('H', 'L'): '99',
('H', 'M'): '87',
('H', 'N'): '68',
('H', 'P'): '77',
('H', 'Q'): '24',
('H', 'R'): '29',
('H', 'S'): '89',
('H', 'T'): '47',
('H', 'V'): '84',
('H', 'W'): '115',
('H', 'Y'): '83',
('I', 'A'): '94',
('I', 'C'): '198',
('I', 'D'): '168',
('I', 'E'): '134',
('I', 'F'): '21',
('I', 'G'): '135',
('I', 'H'): '94',
('I', 'I'): '0',
('I', 'K'): '102',
('I', 'L'): '5',
('I', 'M'): '10',
('I', 'N'): '149',
('I', 'P'): '95',
('I', 'Q'): '109',
('I', 'R'): '97',
('I', 'S'): '142',
('I', 'T'): '89',
('I', 'V'): '29',
('I', 'W'): '61',
('I', 'Y'): '33',
('K', 'A'): '106',
('K', 'C'): '202',
('K', 'D'): '101',
('K', 'E'): '56',
('K', 'F'): '102',
('K', 'G'): '127',
('K', 'H'): '32',
('K', 'I'): '102',
('K', 'K'): '0',
('K', 'L'): '107',
('K', 'M'): '95',
('K', 'N'): '94',
('K', 'P'): '103',
('K', 'Q'): '53',
('K', 'R'): '26',
('K', 'S'): '121',
('K', 'T'): '78',
('K', 'V'): '97',
('K', 'W'): '110',
('K', 'Y'): '85',
('L', 'A'): '96',
('L', 'C'): '198',
('L', 'D'): '172',
('L', 'E'): '138',
('L', 'F'): '22',
('L', 'G'): '138',
('L', 'H'): '99',
('L', 'I'): '5',
('L', 'K'): '107',
('L', 'L'): '0',
('L', 'M'): '15',
('L', 'N'): '153',
('L', 'P'): '98',
('L', 'Q'): '113',
('L', 'R'): '102',
('L', 'S'): '145',
('L', 'T'): '92',
('L', 'V'): '32',
('L', 'W'): '61',
('L', 'Y'): '36',
('M', 'A'): '84',
('M', 'C'): '196',
('M', 'D'): '160',
('M', 'E'): '126',
('M', 'F'): '28',
('M', 'G'): '127',
('M', 'H'): '87',
('M', 'I'): '10',
('M', 'K'): '95',
('M', 'L'): '15',
('M', 'M'): '0',
('M', 'N'): '142',
('M', 'P'): '87',
('M', 'Q'): '101',
('M', 'R'): '91',
('M', 'S'): '135',
('M', 'T'): '81',
('M', 'V'): '21',
('M', 'W'): '67',
('M', 'Y'): '36',
('N', 'A'): '111',
('N', 'C'): '139',
('N', 'D'): '23',
('N', 'E'): '42',
('N', 'F'): '158',
('N', 'G'): '80',
('N', 'H'): '68',
('N', 'I'): '149',
('N', 'K'): '94',
('N', 'L'): '153',
('N', 'M'): '142',
('N', 'N'): '0',
('N', 'P'): '91',
('N', 'Q'): '46',
('N', 'R'): '86',
('N', 'S'): '46',
('N', 'T'): '65',
('N', 'V'): '133',
('N', 'W'): '174',
('N', 'Y'): '143',
('P', 'A'): '27',
('P', 'C'): '169',
('P', 'D'): '108',
('P', 'E'): '93',
('P', 'F'): '114',
('P', 'G'): '42',
('P', 'H'): '77',
('P', 'I'): '95',
('P', 'K'): '103',
('P', 'L'): '98',
('P', 'M'): '87',
('P', 'N'): '91',
('P', 'P'): '0',
('P', 'Q'): '76',
('P', 'R'): '103',
('P', 'S'): '74',
('P', 'T'): '38',
('P', 'V'): '68',
('P', 'W'): '147',
('P', 'Y'): '110',
('Q', 'A'): '91',
('Q', 'C'): '154',
('Q', 'D'): '61',
('Q', 'E'): '29',
('Q', 'F'): '116',
('Q', 'G'): '87',
('Q', 'H'): '24',
('Q', 'I'): '109',
('Q', 'K'): '53',
('Q', 'L'): '113',
('Q', 'M'): '101',
('Q', 'N'): '46',
('Q', 'P'): '76',
('Q', 'Q'): '0',
('Q', 'R'): '43',
('Q', 'S'): '68',
('Q', 'T'): '42',
('Q', 'V'): '96',
('Q', 'W'): '130',
('Q', 'Y'): '99',
('R', 'A'): '112',
('R', 'C'): '180',
('R', 'D'): '96',
('R', 'E'): '54',
('R', 'F'): '97',
('R', 'G'): '125',
('R', 'H'): '29',
('R', 'I'): '97',
('R', 'K'): '26',
('R', 'L'): '102',
('R', 'M'): '91',
('R', 'N'): '86',
('R', 'P'): '103',
('R', 'Q'): '43',
('R', 'R'): '0',
('R', 'S'): '110',
('R', 'T'): '71',
('R', 'V'): '96',
('R', 'W'): '101',
('R', 'Y'): '77',
('S', 'A'): '99',
('S', 'C'): '112',
('S', 'D'): '65',
('S', 'E'): '80',
('S', 'F'): '155',
('S', 'G'): '56',
('S', 'H'): '89',
('S', 'I'): '142',
('S', 'K'): '121',
('S', 'L'): '145',
('S', 'M'): '135',
('S', 'N'): '46',
('S', 'P'): '74',
('S', 'Q'): '68',
('S', 'R'): '110',
('S', 'S'): '0',
('S', 'T'): '58',
('S', 'V'): '124',
('S', 'W'): '177',
('S', 'Y'): '144',
('T', 'A'): '58',
('T', 'C'): '149',
('T', 'D'): '85',
('T', 'E'): '65',
('T', 'F'): '103',
('T', 'G'): '59',
('T', 'H'): '47',
('T', 'I'): '89',
('T', 'K'): '78',
('T', 'L'): '92',
('T', 'M'): '81',
('T', 'N'): '65',
('T', 'P'): '38',
('T', 'Q'): '42',
('T', 'R'): '71',
('T', 'S'): '58',
('T', 'T'): '0',
('T', 'V'): '69',
('T', 'W'): '128',
('T', 'Y'): '92',
('V', 'A'): '64',
('V', 'C'): '192',
('V', 'D'): '152',
('V', 'E'): '121',
('V', 'F'): '50',
('V', 'G'): '109',
('V', 'H'): '84',
('V', 'I'): '29',
('V', 'K'): '97',
('V', 'L'): '32',
('V', 'M'): '21',
('V', 'N'): '133',
('V', 'P'): '68',
('V', 'Q'): '96',
('V', 'R'): '96',
('V', 'S'): '124',
('V', 'T'): '69',
('V', 'V'): '0',
('V', 'W'): '88',
('V', 'Y'): '55',
('W', 'A'): '148',
('W', 'C'): '215',
('W', 'D'): '181',
('W', 'E'): '152',
('W', 'F'): '40',
('W', 'G'): '184',
('W', 'H'): '115',
('W', 'I'): '61',
('W', 'K'): '110',
('W', 'L'): '61',
('W', 'M'): '67',
('W', 'N'): '174',
('W', 'P'): '147',
('W', 'Q'): '130',
('W', 'R'): '101',
('W', 'S'): '177',
('W', 'T'): '128',
('W', 'V'): '88',
('W', 'W'): '0',
('W', 'Y'): '37',
('Y', 'A'): '112',
('Y', 'C'): '194',
('Y', 'D'): '160',
('Y', 'E'): '122',
('Y', 'F'): '22',
('Y', 'G'): '147',
('Y', 'H'): '83',
('Y', 'I'): '33',
('Y', 'K'): '85',
('Y', 'L'): '36',
('Y', 'M'): '36',
('Y', 'N'): '143',
('Y', 'P'): '110',
('Y', 'Q'): '99',
('Y', 'R'): '77',
('Y', 'S'): '144',
('Y', 'T'): '92',
('Y', 'V'): '55',
('Y', 'W'): '37',
('Y', 'Y'): '0'
}
@samesense
Copy link

grantham.matrix.txt should have .{tab}A in the first row instead of .{space}A

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment