Skip to content

Instantly share code, notes, and snippets.

@stuppie
Last active August 25, 2016 23:44
Show Gist options
  • Save stuppie/8f90bc5a661c123f17fc29ad75af1e23 to your computer and use it in GitHub Desktop.
Save stuppie/8f90bc5a661c123f17fc29ad75af1e23 to your computer and use it in GitHub Desktop.
Genes that have more than one protein
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Genes that have more than one protein"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Download uniprot human from:\n",
"http://www.uniprot.org/uniprot/?query=reviewed:yes%20taxonomy:9606"
]
},
{
"cell_type": "code",
"execution_count": 209,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Entry</th>\n",
" <th>Entry name</th>\n",
" <th>Status</th>\n",
" <th>Protein names</th>\n",
" <th>Gene names</th>\n",
" <th>Organism</th>\n",
" <th>Length</th>\n",
" <th>Protein families</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>P31946</td>\n",
" <td>1433B_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>14-3-3 protein beta/alpha (Protein 1054) (Prot...</td>\n",
" <td>YWHAB</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>246</td>\n",
" <td>14-3-3 family</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>P04439</td>\n",
" <td>1A03_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>HLA class I histocompatibility antigen, A-3 al...</td>\n",
" <td>HLA-A HLAA</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>365</td>\n",
" <td>MHC class I family</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>P01889</td>\n",
" <td>1B07_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>HLA class I histocompatibility antigen, B-7 al...</td>\n",
" <td>HLA-B HLAB</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>362</td>\n",
" <td>MHC class I family</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>P30464</td>\n",
" <td>1B15_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>HLA class I histocompatibility antigen, B-15 a...</td>\n",
" <td>HLA-B HLAB</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>362</td>\n",
" <td>MHC class I family</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>P30685</td>\n",
" <td>1B35_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>HLA class I histocompatibility antigen, B-35 a...</td>\n",
" <td>HLA-B HLAB</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>362</td>\n",
" <td>MHC class I family</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Entry Entry name Status \\\n",
"0 P31946 1433B_HUMAN reviewed \n",
"1 P04439 1A03_HUMAN reviewed \n",
"2 P01889 1B07_HUMAN reviewed \n",
"3 P30464 1B15_HUMAN reviewed \n",
"4 P30685 1B35_HUMAN reviewed \n",
"\n",
" Protein names Gene names \\\n",
"0 14-3-3 protein beta/alpha (Protein 1054) (Prot... YWHAB \n",
"1 HLA class I histocompatibility antigen, A-3 al... HLA-A HLAA \n",
"2 HLA class I histocompatibility antigen, B-7 al... HLA-B HLAB \n",
"3 HLA class I histocompatibility antigen, B-15 a... HLA-B HLAB \n",
"4 HLA class I histocompatibility antigen, B-35 a... HLA-B HLAB \n",
"\n",
" Organism Length Protein families \n",
"0 Homo sapiens (Human) 246 14-3-3 family \n",
"1 Homo sapiens (Human) 365 MHC class I family \n",
"2 Homo sapiens (Human) 362 MHC class I family \n",
"3 Homo sapiens (Human) 362 MHC class I family \n",
"4 Homo sapiens (Human) 362 MHC class I family "
]
},
"execution_count": 209,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from pprint import pprint\n",
"import pandas as pd\n",
"df = pd.read_csv(\"uniprot-reviewed%3Ayes+taxonomy%3A9606.tab.gz\", sep=\"\\t\")\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 210,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Entry</th>\n",
" <th>Entry name</th>\n",
" <th>Status</th>\n",
" <th>Protein names</th>\n",
" <th>Gene names</th>\n",
" <th>Organism</th>\n",
" <th>Length</th>\n",
" <th>Protein families</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>7104</th>\n",
" <td>P0C0S8</td>\n",
" <td>H2A1_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>Histone H2A type 1 (H2A.1) (Histone H2A/p)</td>\n",
" <td>HIST1H2AG H2AFP; HIST1H2AI H2AFC; HIST1H2AK H2...</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>130</td>\n",
" <td>Histone H2A family</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Entry Entry name Status \\\n",
"7104 P0C0S8 H2A1_HUMAN reviewed \n",
"\n",
" Protein names \\\n",
"7104 Histone H2A type 1 (H2A.1) (Histone H2A/p) \n",
"\n",
" Gene names Organism \\\n",
"7104 HIST1H2AG H2AFP; HIST1H2AI H2AFC; HIST1H2AK H2... Homo sapiens (Human) \n",
"\n",
" Length Protein families \n",
"7104 130 Histone H2A family "
]
},
"execution_count": 210,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.query(\"Entry == 'P0C0S8'\")"
]
},
{
"cell_type": "code",
"execution_count": 213,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Entry</th>\n",
" <th>Entry name</th>\n",
" <th>Status</th>\n",
" <th>Protein names</th>\n",
" <th>Gene names</th>\n",
" <th>Organism</th>\n",
" <th>Length</th>\n",
" <th>Protein families</th>\n",
" <th>genename1</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>7104</th>\n",
" <td>P0C0S8</td>\n",
" <td>H2A1_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>Histone H2A type 1 (H2A.1) (Histone H2A/p)</td>\n",
" <td>HIST1H2AG H2AFP; HIST1H2AI H2AFC; HIST1H2AK H2...</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>130</td>\n",
" <td>Histone H2A family</td>\n",
" <td>HIST1H2AG HIST1H2AI HIST1H2AK HIST1H2AL HIST1H2AM</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Entry Entry name Status \\\n",
"7104 P0C0S8 H2A1_HUMAN reviewed \n",
"\n",
" Protein names \\\n",
"7104 Histone H2A type 1 (H2A.1) (Histone H2A/p) \n",
"\n",
" Gene names Organism \\\n",
"7104 HIST1H2AG H2AFP; HIST1H2AI H2AFC; HIST1H2AK H2... Homo sapiens (Human) \n",
"\n",
" Length Protein families \\\n",
"7104 130 Histone H2A family \n",
"\n",
" genename1 \n",
"7104 HIST1H2AG HIST1H2AI HIST1H2AK HIST1H2AL HIST1H2AM "
]
},
"execution_count": 213,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['genename1'] = df['Gene names'].map(lambda x:' '.join([y.strip().split(' ')[0] for y in x.split(\";\")]) if isinstance(x,str) else x)\n",
"df.query(\"Entry == 'P0C0S8'\")"
]
},
{
"cell_type": "code",
"execution_count": 214,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# split proteins with multiple genes up into multiple rows\n",
"s = df['genename1'].str.split(' ').apply(pd.Series, 1).stack()\n",
"s.index = s.index.droplevel(-1)\n",
"s.name = 'genename'\n",
"df = df.join(s)"
]
},
{
"cell_type": "code",
"execution_count": 223,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Entry</th>\n",
" <th>Entry name</th>\n",
" <th>Status</th>\n",
" <th>Protein names</th>\n",
" <th>Gene names</th>\n",
" <th>Organism</th>\n",
" <th>Length</th>\n",
" <th>Protein families</th>\n",
" <th>genename1</th>\n",
" <th>genename</th>\n",
" <th>genename_first</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>7104</th>\n",
" <td>P0C0S8</td>\n",
" <td>H2A1_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>Histone H2A type 1 (H2A.1) (Histone H2A/p)</td>\n",
" <td>HIST1H2AG H2AFP; HIST1H2AI H2AFC; HIST1H2AK H2...</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>130</td>\n",
" <td>Histone H2A family</td>\n",
" <td>HIST1H2AG HIST1H2AI HIST1H2AK HIST1H2AL HIST1H2AM</td>\n",
" <td>HIST1H2AG</td>\n",
" <td>HIST1H2AG</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7104</th>\n",
" <td>P0C0S8</td>\n",
" <td>H2A1_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>Histone H2A type 1 (H2A.1) (Histone H2A/p)</td>\n",
" <td>HIST1H2AG H2AFP; HIST1H2AI H2AFC; HIST1H2AK H2...</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>130</td>\n",
" <td>Histone H2A family</td>\n",
" <td>HIST1H2AG HIST1H2AI HIST1H2AK HIST1H2AL HIST1H2AM</td>\n",
" <td>HIST1H2AI</td>\n",
" <td>HIST1H2AG</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7104</th>\n",
" <td>P0C0S8</td>\n",
" <td>H2A1_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>Histone H2A type 1 (H2A.1) (Histone H2A/p)</td>\n",
" <td>HIST1H2AG H2AFP; HIST1H2AI H2AFC; HIST1H2AK H2...</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>130</td>\n",
" <td>Histone H2A family</td>\n",
" <td>HIST1H2AG HIST1H2AI HIST1H2AK HIST1H2AL HIST1H2AM</td>\n",
" <td>HIST1H2AK</td>\n",
" <td>HIST1H2AG</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7104</th>\n",
" <td>P0C0S8</td>\n",
" <td>H2A1_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>Histone H2A type 1 (H2A.1) (Histone H2A/p)</td>\n",
" <td>HIST1H2AG H2AFP; HIST1H2AI H2AFC; HIST1H2AK H2...</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>130</td>\n",
" <td>Histone H2A family</td>\n",
" <td>HIST1H2AG HIST1H2AI HIST1H2AK HIST1H2AL HIST1H2AM</td>\n",
" <td>HIST1H2AL</td>\n",
" <td>HIST1H2AG</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7104</th>\n",
" <td>P0C0S8</td>\n",
" <td>H2A1_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>Histone H2A type 1 (H2A.1) (Histone H2A/p)</td>\n",
" <td>HIST1H2AG H2AFP; HIST1H2AI H2AFC; HIST1H2AK H2...</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>130</td>\n",
" <td>Histone H2A family</td>\n",
" <td>HIST1H2AG HIST1H2AI HIST1H2AK HIST1H2AL HIST1H2AM</td>\n",
" <td>HIST1H2AM</td>\n",
" <td>HIST1H2AG</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Entry Entry name Status \\\n",
"7104 P0C0S8 H2A1_HUMAN reviewed \n",
"7104 P0C0S8 H2A1_HUMAN reviewed \n",
"7104 P0C0S8 H2A1_HUMAN reviewed \n",
"7104 P0C0S8 H2A1_HUMAN reviewed \n",
"7104 P0C0S8 H2A1_HUMAN reviewed \n",
"\n",
" Protein names \\\n",
"7104 Histone H2A type 1 (H2A.1) (Histone H2A/p) \n",
"7104 Histone H2A type 1 (H2A.1) (Histone H2A/p) \n",
"7104 Histone H2A type 1 (H2A.1) (Histone H2A/p) \n",
"7104 Histone H2A type 1 (H2A.1) (Histone H2A/p) \n",
"7104 Histone H2A type 1 (H2A.1) (Histone H2A/p) \n",
"\n",
" Gene names Organism \\\n",
"7104 HIST1H2AG H2AFP; HIST1H2AI H2AFC; HIST1H2AK H2... Homo sapiens (Human) \n",
"7104 HIST1H2AG H2AFP; HIST1H2AI H2AFC; HIST1H2AK H2... Homo sapiens (Human) \n",
"7104 HIST1H2AG H2AFP; HIST1H2AI H2AFC; HIST1H2AK H2... Homo sapiens (Human) \n",
"7104 HIST1H2AG H2AFP; HIST1H2AI H2AFC; HIST1H2AK H2... Homo sapiens (Human) \n",
"7104 HIST1H2AG H2AFP; HIST1H2AI H2AFC; HIST1H2AK H2... Homo sapiens (Human) \n",
"\n",
" Length Protein families \\\n",
"7104 130 Histone H2A family \n",
"7104 130 Histone H2A family \n",
"7104 130 Histone H2A family \n",
"7104 130 Histone H2A family \n",
"7104 130 Histone H2A family \n",
"\n",
" genename1 genename \\\n",
"7104 HIST1H2AG HIST1H2AI HIST1H2AK HIST1H2AL HIST1H2AM HIST1H2AG \n",
"7104 HIST1H2AG HIST1H2AI HIST1H2AK HIST1H2AL HIST1H2AM HIST1H2AI \n",
"7104 HIST1H2AG HIST1H2AI HIST1H2AK HIST1H2AL HIST1H2AM HIST1H2AK \n",
"7104 HIST1H2AG HIST1H2AI HIST1H2AK HIST1H2AL HIST1H2AM HIST1H2AL \n",
"7104 HIST1H2AG HIST1H2AI HIST1H2AK HIST1H2AL HIST1H2AM HIST1H2AM \n",
"\n",
" genename_first \n",
"7104 HIST1H2AG \n",
"7104 HIST1H2AG \n",
"7104 HIST1H2AG \n",
"7104 HIST1H2AG \n",
"7104 HIST1H2AG "
]
},
"execution_count": 223,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['genename_first'] = df['genename1'].map(lambda x:x.split(' ')[0] if isinstance(x,str) else x)\n",
"df.query(\"Entry == 'P0C0S8'\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### If you count proteins with more than one gene multiples times:"
]
},
{
"cell_type": "code",
"execution_count": 226,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"genename_first\n",
"AKAP7 2\n",
"AMY1A 3\n",
"ARL17A 2\n",
"BBC3 2\n",
"BGLAP 2\n",
"BOLA2 2\n",
"BPY2 3\n",
"C2orf27A 2\n",
"C4B 2\n",
"CALCA 2\n",
"CALM1 3\n",
"CASC2 2\n",
"CCL3L1 2\n",
"CCL4L1 2\n",
"CDKN2A 2\n",
"CDY1 2\n",
"CDY2A 2\n",
"CGB3 3\n",
"CHTF8 2\n",
"CKMT1A 2\n",
"CSAG2 2\n",
"CT47A1 12\n",
"CTAG1A 2\n",
"CTAGE1 2\n",
"CUX1 2\n",
"CXorf49 2\n",
"DEFA1 2\n",
"DEFB103A 2\n",
"DEFB104A 2\n",
"DEFB105A 2\n",
" ..\n",
"PRH1 2\n",
"PRNP 2\n",
"PRY 4\n",
"RAB34 2\n",
"RABGAP1L 2\n",
"RBMY1F 2\n",
"RGPD5 2\n",
"RPL9 4\n",
"RPP14 2\n",
"SERF1A 2\n",
"SIRPB1 2\n",
"SLX1A 2\n",
"SMN1 2\n",
"SPACA5 2\n",
"SPANXA1 2\n",
"SSX2 2\n",
"SSX4 2\n",
"TEX28 3\n",
"TMPO 2\n",
"TOR1AIP2 2\n",
"TOR2A 2\n",
"TP53TG3 6\n",
"TRIM49D1 2\n",
"TSPO 2\n",
"TUBA3C 2\n",
"USP17L24 7\n",
"VCY 2\n",
"XAGE1A 5\n",
"ZFP64 2\n",
"ZNF365 2\n",
"Name: Entry, dtype: int64"
]
},
"execution_count": 226,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"db = df.groupby(\"genename_first\").count()[\"Entry\"]\n",
"genes = db[db>1]\n",
"genes"
]
},
{
"cell_type": "code",
"execution_count": 227,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"137 genes with more than one protein\n",
"441 proteins\n"
]
}
],
"source": [
"print(\"{} genes with more than one protein\".format(genes.count()))\n",
"print(\"{} proteins\".format(genes.sum()))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### If you count proteins with more than one gene once"
]
},
{
"cell_type": "code",
"execution_count": 229,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"genename\n",
"AKAP7 2\n",
"BBC3 2\n",
"BGLAP 2\n",
"CALCA 2\n",
"CASC2 2\n",
"CDKN2A 2\n",
"CHTF8 2\n",
"CTAGE1 2\n",
"CUX1 2\n",
"DUSP13 2\n",
"EPM2A 2\n",
"ERVK-10 4\n",
"ERVK-18 3\n",
"ERVK-19 5\n",
"ERVK-21 4\n",
"ERVK-24 4\n",
"ERVK-25 4\n",
"ERVK-5 3\n",
"ERVK-6 5\n",
"ERVK-7 5\n",
"ERVK-8 5\n",
"ERVK-9 5\n",
"EXD3 2\n",
"FAM127A 2\n",
"FAU 2\n",
"GNAS 4\n",
"HERV-K104 2\n",
"HERVK_113 5\n",
"HLA-A 21\n",
"HLA-B 35\n",
"HLA-C 14\n",
"HLA-DRB1 13\n",
"HMSD 2\n",
"KIAA1456 2\n",
"LRTOMT 2\n",
"MECOM 2\n",
"MOCS2 2\n",
"NACA 2\n",
"NRXN1 2\n",
"NRXN2 2\n",
"NRXN3 2\n",
"POLR2M 2\n",
"PRNP 2\n",
"RAB34 2\n",
"RABGAP1L 2\n",
"RPP14 2\n",
"SIRPB1 2\n",
"TMPO 2\n",
"TOR1AIP2 2\n",
"TOR2A 2\n",
"TSPO 2\n",
"ZFP64 2\n",
"ZNF365 2\n",
"Name: Entry, dtype: int64"
]
},
"execution_count": 229,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"db = df.groupby(\"genename\").count()[\"Entry\"]\n",
"genes = db[db>1]\n",
"genes"
]
},
{
"cell_type": "code",
"execution_count": 230,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"53 genes with more than one protein\n",
"211 proteins\n"
]
}
],
"source": [
"print(\"{} genes with more than one protein\".format(genes.count()))\n",
"print(\"{} proteins\".format(genes.sum()))"
]
},
{
"cell_type": "code",
"execution_count": 231,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Entry</th>\n",
" <th>Entry name</th>\n",
" <th>Status</th>\n",
" <th>Protein names</th>\n",
" <th>Gene names</th>\n",
" <th>Organism</th>\n",
" <th>Length</th>\n",
" <th>Protein families</th>\n",
" <th>genename1</th>\n",
" <th>genename</th>\n",
" <th>genename_first</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>P04439</td>\n",
" <td>1A03_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>HLA class I histocompatibility antigen, A-3 al...</td>\n",
" <td>HLA-A HLAA</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>365</td>\n",
" <td>MHC class I family</td>\n",
" <td>HLA-A</td>\n",
" <td>HLA-A</td>\n",
" <td>HLA-A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>P01889</td>\n",
" <td>1B07_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>HLA class I histocompatibility antigen, B-7 al...</td>\n",
" <td>HLA-B HLAB</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>362</td>\n",
" <td>MHC class I family</td>\n",
" <td>HLA-B</td>\n",
" <td>HLA-B</td>\n",
" <td>HLA-B</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>P30464</td>\n",
" <td>1B15_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>HLA class I histocompatibility antigen, B-15 a...</td>\n",
" <td>HLA-B HLAB</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>362</td>\n",
" <td>MHC class I family</td>\n",
" <td>HLA-B</td>\n",
" <td>HLA-B</td>\n",
" <td>HLA-B</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>P30685</td>\n",
" <td>1B35_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>HLA class I histocompatibility antigen, B-35 a...</td>\n",
" <td>HLA-B HLAB</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>362</td>\n",
" <td>MHC class I family</td>\n",
" <td>HLA-B</td>\n",
" <td>HLA-B</td>\n",
" <td>HLA-B</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Q95365</td>\n",
" <td>1B38_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>HLA class I histocompatibility antigen, B-38 a...</td>\n",
" <td>HLA-B HLAB</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>362</td>\n",
" <td>MHC class I family</td>\n",
" <td>HLA-B</td>\n",
" <td>HLA-B</td>\n",
" <td>HLA-B</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Q04826</td>\n",
" <td>1B40_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>HLA class I histocompatibility antigen, B-40 a...</td>\n",
" <td>HLA-B HLAB</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>362</td>\n",
" <td>MHC class I family</td>\n",
" <td>HLA-B</td>\n",
" <td>HLA-B</td>\n",
" <td>HLA-B</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>P30484</td>\n",
" <td>1B46_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>HLA class I histocompatibility antigen, B-46 a...</td>\n",
" <td>HLA-B HLAB</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>362</td>\n",
" <td>MHC class I family</td>\n",
" <td>HLA-B</td>\n",
" <td>HLA-B</td>\n",
" <td>HLA-B</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>P30492</td>\n",
" <td>1B54_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>HLA class I histocompatibility antigen, B-54 a...</td>\n",
" <td>HLA-B HLAB</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>362</td>\n",
" <td>MHC class I family</td>\n",
" <td>HLA-B</td>\n",
" <td>HLA-B</td>\n",
" <td>HLA-B</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>P30493</td>\n",
" <td>1B55_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>HLA class I histocompatibility antigen, B-55 a...</td>\n",
" <td>HLA-B HLAB CDABP0067</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>362</td>\n",
" <td>MHC class I family</td>\n",
" <td>HLA-B</td>\n",
" <td>HLA-B</td>\n",
" <td>HLA-B</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>P18465</td>\n",
" <td>1B57_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>HLA class I histocompatibility antigen, B-57 a...</td>\n",
" <td>HLA-B HLAB</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>362</td>\n",
" <td>MHC class I family</td>\n",
" <td>HLA-B</td>\n",
" <td>HLA-B</td>\n",
" <td>HLA-B</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>Q29836</td>\n",
" <td>1B67_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>HLA class I histocompatibility antigen, B-67 a...</td>\n",
" <td>HLA-B HLAB</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>362</td>\n",
" <td>MHC class I family</td>\n",
" <td>HLA-B</td>\n",
" <td>HLA-B</td>\n",
" <td>HLA-B</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>Q31610</td>\n",
" <td>1B81_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>HLA class I histocompatibility antigen, B-81 a...</td>\n",
" <td>HLA-B HLAB</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>362</td>\n",
" <td>MHC class I family</td>\n",
" <td>HLA-B</td>\n",
" <td>HLA-B</td>\n",
" <td>HLA-B</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>P30499</td>\n",
" <td>1C01_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>HLA class I histocompatibility antigen, Cw-1 a...</td>\n",
" <td>HLA-C HLAC</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>366</td>\n",
" <td>MHC class I family</td>\n",
" <td>HLA-C</td>\n",
" <td>HLA-C</td>\n",
" <td>HLA-C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>Q29865</td>\n",
" <td>1C18_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>HLA class I histocompatibility antigen, Cw-18 ...</td>\n",
" <td>HLA-C HLAC</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>366</td>\n",
" <td>MHC class I family</td>\n",
" <td>HLA-C</td>\n",
" <td>HLA-C</td>\n",
" <td>HLA-C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>42</th>\n",
" <td>P30443</td>\n",
" <td>1A01_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>HLA class I histocompatibility antigen, A-1 al...</td>\n",
" <td>HLA-A HLAA</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>365</td>\n",
" <td>MHC class I family</td>\n",
" <td>HLA-A</td>\n",
" <td>HLA-A</td>\n",
" <td>HLA-A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>43</th>\n",
" <td>P01892</td>\n",
" <td>1A02_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>HLA class I histocompatibility antigen, A-2 al...</td>\n",
" <td>HLA-A HLAA</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>365</td>\n",
" <td>MHC class I family</td>\n",
" <td>HLA-A</td>\n",
" <td>HLA-A</td>\n",
" <td>HLA-A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>44</th>\n",
" <td>P13746</td>\n",
" <td>1A11_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>HLA class I histocompatibility antigen, A-11 a...</td>\n",
" <td>HLA-A HLAA</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>365</td>\n",
" <td>MHC class I family</td>\n",
" <td>HLA-A</td>\n",
" <td>HLA-A</td>\n",
" <td>HLA-A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>46</th>\n",
" <td>P30447</td>\n",
" <td>1A23_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>HLA class I histocompatibility antigen, A-23 a...</td>\n",
" <td>HLA-A HLAA</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>365</td>\n",
" <td>MHC class I family</td>\n",
" <td>HLA-A</td>\n",
" <td>HLA-A</td>\n",
" <td>HLA-A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>47</th>\n",
" <td>P18462</td>\n",
" <td>1A25_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>HLA class I histocompatibility antigen, A-25 a...</td>\n",
" <td>HLA-A HLAA</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>365</td>\n",
" <td>MHC class I family</td>\n",
" <td>HLA-A</td>\n",
" <td>HLA-A</td>\n",
" <td>HLA-A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48</th>\n",
" <td>P16190</td>\n",
" <td>1A33_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>HLA class I histocompatibility antigen, A-33 a...</td>\n",
" <td>HLA-A HLAA</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>365</td>\n",
" <td>MHC class I family</td>\n",
" <td>HLA-A</td>\n",
" <td>HLA-A</td>\n",
" <td>HLA-A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>49</th>\n",
" <td>P30455</td>\n",
" <td>1A36_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>HLA class I histocompatibility antigen, A-36 a...</td>\n",
" <td>HLA-A HLAA</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>365</td>\n",
" <td>MHC class I family</td>\n",
" <td>HLA-A</td>\n",
" <td>HLA-A</td>\n",
" <td>HLA-A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50</th>\n",
" <td>P30457</td>\n",
" <td>1A66_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>HLA class I histocompatibility antigen, A-66 a...</td>\n",
" <td>HLA-A HLAA</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>365</td>\n",
" <td>MHC class I family</td>\n",
" <td>HLA-A</td>\n",
" <td>HLA-A</td>\n",
" <td>HLA-A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>51</th>\n",
" <td>P01891</td>\n",
" <td>1A68_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>HLA class I histocompatibility antigen, A-68 a...</td>\n",
" <td>HLA-A HLAA</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>365</td>\n",
" <td>MHC class I family</td>\n",
" <td>HLA-A</td>\n",
" <td>HLA-A</td>\n",
" <td>HLA-A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>52</th>\n",
" <td>P30460</td>\n",
" <td>1B08_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>HLA class I histocompatibility antigen, B-8 al...</td>\n",
" <td>HLA-B HLAB</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>362</td>\n",
" <td>MHC class I family</td>\n",
" <td>HLA-B</td>\n",
" <td>HLA-B</td>\n",
" <td>HLA-B</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53</th>\n",
" <td>P30480</td>\n",
" <td>1B42_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>HLA class I histocompatibility antigen, B-42 a...</td>\n",
" <td>HLA-B HLAB</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>362</td>\n",
" <td>MHC class I family</td>\n",
" <td>HLA-B</td>\n",
" <td>HLA-B</td>\n",
" <td>HLA-B</td>\n",
" </tr>\n",
" <tr>\n",
" <th>54</th>\n",
" <td>P30483</td>\n",
" <td>1B45_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>HLA class I histocompatibility antigen, B-45 a...</td>\n",
" <td>HLA-B HLAB</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>362</td>\n",
" <td>MHC class I family</td>\n",
" <td>HLA-B</td>\n",
" <td>HLA-B</td>\n",
" <td>HLA-B</td>\n",
" </tr>\n",
" <tr>\n",
" <th>55</th>\n",
" <td>Q29940</td>\n",
" <td>1B59_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>HLA class I histocompatibility antigen, B-59 a...</td>\n",
" <td>HLA-B HLAB</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>362</td>\n",
" <td>MHC class I family</td>\n",
" <td>HLA-B</td>\n",
" <td>HLA-B</td>\n",
" <td>HLA-B</td>\n",
" </tr>\n",
" <tr>\n",
" <th>56</th>\n",
" <td>P30498</td>\n",
" <td>1B78_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>HLA class I histocompatibility antigen, B-78 a...</td>\n",
" <td>HLA-B HLAB</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>362</td>\n",
" <td>MHC class I family</td>\n",
" <td>HLA-B</td>\n",
" <td>HLA-B</td>\n",
" <td>HLA-B</td>\n",
" </tr>\n",
" <tr>\n",
" <th>57</th>\n",
" <td>Q9TNN7</td>\n",
" <td>1C05_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>HLA class I histocompatibility antigen, Cw-5 a...</td>\n",
" <td>HLA-C HLAC</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>366</td>\n",
" <td>MHC class I family</td>\n",
" <td>HLA-C</td>\n",
" <td>HLA-C</td>\n",
" <td>HLA-C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>58</th>\n",
" <td>Q29963</td>\n",
" <td>1C06_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>HLA class I histocompatibility antigen, Cw-6 a...</td>\n",
" <td>HLA-C HLAC</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>366</td>\n",
" <td>MHC class I family</td>\n",
" <td>HLA-C</td>\n",
" <td>HLA-C</td>\n",
" <td>HLA-C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14353</th>\n",
" <td>P61572</td>\n",
" <td>REC19_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>Endogenous retrovirus group K member 19 Rec pr...</td>\n",
" <td>ERVK-19</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>105</td>\n",
" <td>NaN</td>\n",
" <td>ERVK-19</td>\n",
" <td>ERVK-19</td>\n",
" <td>ERVK-19</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14354</th>\n",
" <td>P61573</td>\n",
" <td>REC9_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>Endogenous retrovirus group K member 9 Rec pro...</td>\n",
" <td>ERVK-9</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>105</td>\n",
" <td>NaN</td>\n",
" <td>ERVK-9</td>\n",
" <td>ERVK-9</td>\n",
" <td>ERVK-9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14415</th>\n",
" <td>P61576</td>\n",
" <td>REC04_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>Endogenous retrovirus group K member 104 Rec p...</td>\n",
" <td>HERV-K104</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>105</td>\n",
" <td>NaN</td>\n",
" <td>HERV-K104</td>\n",
" <td>HERV-K104</td>\n",
" <td>HERV-K104</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14843</th>\n",
" <td>O95059</td>\n",
" <td>RPP14_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>Ribonuclease P protein subunit p14 (EC 3.1.26.5)</td>\n",
" <td>RPP14</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>124</td>\n",
" <td>Eukaryotic/archaeal RNase P protein component ...</td>\n",
" <td>RPP14</td>\n",
" <td>RPP14</td>\n",
" <td>RPP14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14963</th>\n",
" <td>P62861</td>\n",
" <td>RS30_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>40S ribosomal protein S30</td>\n",
" <td>FAU</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>59</td>\n",
" <td>Ribosomal protein S30e family</td>\n",
" <td>FAU</td>\n",
" <td>FAU</td>\n",
" <td>FAU</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15830</th>\n",
" <td>O00241</td>\n",
" <td>SIRB1_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>Signal-regulatory protein beta-1 (SIRP-beta-1)...</td>\n",
" <td>SIRPB1</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>398</td>\n",
" <td>NaN</td>\n",
" <td>SIRPB1</td>\n",
" <td>SIRPB1</td>\n",
" <td>SIRPB1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16060</th>\n",
" <td>Q5TFQ8</td>\n",
" <td>SIRBL_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>Signal-regulatory protein beta-1 isoform 3 (SI...</td>\n",
" <td>SIRPB1</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>398</td>\n",
" <td>NaN</td>\n",
" <td>SIRPB1</td>\n",
" <td>SIRPB1</td>\n",
" <td>SIRPB1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17126</th>\n",
" <td>Q70YC4</td>\n",
" <td>TALAN_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>Talanin</td>\n",
" <td>ZNF365 KIAA0844</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>216</td>\n",
" <td>NaN</td>\n",
" <td>ZNF365</td>\n",
" <td>ZNF365</td>\n",
" <td>ZNF365</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17709</th>\n",
" <td>Q8WZ04</td>\n",
" <td>TOMT_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>Transmembrane O-methyltransferase (EC 2.1.1.6)...</td>\n",
" <td>LRTOMT COMT2 TOMT PP7517</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>291</td>\n",
" <td>Class I-like SAM-binding methyltransferase sup...</td>\n",
" <td>LRTOMT</td>\n",
" <td>LRTOMT</td>\n",
" <td>LRTOMT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17711</th>\n",
" <td>Q8N2E6</td>\n",
" <td>TOR2X_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>Prosalusin (Torsin family 2 member A) (Torsin-...</td>\n",
" <td>TOR2A HEMBA1005096 PSEC0218</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>242</td>\n",
" <td>ClpA/ClpB family, Torsin subfamily</td>\n",
" <td>TOR2A</td>\n",
" <td>TOR2A</td>\n",
" <td>TOR2A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17864</th>\n",
" <td>Q8NFQ8</td>\n",
" <td>TOIP2_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>Torsin-1A-interacting protein 2 (Lumenal domai...</td>\n",
" <td>TOR1AIP2 IFRG15 LULL1</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>470</td>\n",
" <td>TOR1AIP family</td>\n",
" <td>TOR1AIP2</td>\n",
" <td>TOR1AIP2</td>\n",
" <td>TOR1AIP2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18056</th>\n",
" <td>Q5JU69</td>\n",
" <td>TOR2A_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>Torsin-2A (Torsin family 2 member A) (Torsin-r...</td>\n",
" <td>TOR2A TORP1 UNQ6408/PRO21181</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>321</td>\n",
" <td>ClpA/ClpB family, Torsin subfamily</td>\n",
" <td>TOR2A</td>\n",
" <td>TOR2A</td>\n",
" <td>TOR2A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18168</th>\n",
" <td>B1AH88</td>\n",
" <td>TSPOB_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>Putative peripheral benzodiazepine receptor-re...</td>\n",
" <td>TSPO PBRS</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>102</td>\n",
" <td>NaN</td>\n",
" <td>TSPO</td>\n",
" <td>TSPO</td>\n",
" <td>TSPO</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18208</th>\n",
" <td>P30536</td>\n",
" <td>TSPO_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>Translocator protein (Mitochondrial benzodiaze...</td>\n",
" <td>TSPO BZRP MBR</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>169</td>\n",
" <td>TspO/BZRP family</td>\n",
" <td>TSPO</td>\n",
" <td>TSPO</td>\n",
" <td>TSPO</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18446</th>\n",
" <td>P35544</td>\n",
" <td>UBIM_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>Ubiquitin-like protein FUBI</td>\n",
" <td>FAU</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>74</td>\n",
" <td>Ubiquitin family</td>\n",
" <td>FAU</td>\n",
" <td>FAU</td>\n",
" <td>FAU</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18805</th>\n",
" <td>P63124</td>\n",
" <td>VPK04_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>Endogenous retrovirus group K member 104 Pro p...</td>\n",
" <td>HERV-K104</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>156</td>\n",
" <td>Peptidase A2 family, HERV class-II K(HML-2) su...</td>\n",
" <td>HERV-K104</td>\n",
" <td>HERV-K104</td>\n",
" <td>HERV-K104</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18806</th>\n",
" <td>P63123</td>\n",
" <td>VPK18_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>Endogenous retrovirus group K member 18 Pro pr...</td>\n",
" <td>ERVK-18</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>156</td>\n",
" <td>Peptidase A2 family, HERV class-II K(HML-2) su...</td>\n",
" <td>ERVK-18</td>\n",
" <td>ERVK-18</td>\n",
" <td>ERVK-18</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18807</th>\n",
" <td>P63125</td>\n",
" <td>VPK25_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>Endogenous retrovirus group K member 25 Pro pr...</td>\n",
" <td>ERVK-25</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>156</td>\n",
" <td>Peptidase A2 family, HERV class-II K(HML-2) su...</td>\n",
" <td>ERVK-25</td>\n",
" <td>ERVK-25</td>\n",
" <td>ERVK-25</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18808</th>\n",
" <td>P63122</td>\n",
" <td>VPK8_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>Endogenous retrovirus group K member 8 Pro pro...</td>\n",
" <td>ERVK-8</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>156</td>\n",
" <td>Peptidase A2 family, HERV class-II K(HML-2) su...</td>\n",
" <td>ERVK-8</td>\n",
" <td>ERVK-8</td>\n",
" <td>ERVK-8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18921</th>\n",
" <td>P10265</td>\n",
" <td>VPK10_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>Endogenous retrovirus group K member 10 Pro pr...</td>\n",
" <td>ERVK-10</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>156</td>\n",
" <td>Peptidase A2 family, HERV class-II K(HML-2) su...</td>\n",
" <td>ERVK-10</td>\n",
" <td>ERVK-10</td>\n",
" <td>ERVK-10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18967</th>\n",
" <td>P63121</td>\n",
" <td>VP113_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>Endogenous retrovirus group K member 113 Pro p...</td>\n",
" <td>HERVK_113</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>156</td>\n",
" <td>Peptidase A2 family, HERV class-II K(HML-2) su...</td>\n",
" <td>HERVK_113</td>\n",
" <td>HERVK_113</td>\n",
" <td>HERVK_113</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18970</th>\n",
" <td>P63131</td>\n",
" <td>VPK7_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>Endogenous retrovirus group K member 7 Pro pro...</td>\n",
" <td>ERVK-7</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>156</td>\n",
" <td>Peptidase A2 family, HERV class-II K(HML-2) su...</td>\n",
" <td>ERVK-7</td>\n",
" <td>ERVK-7</td>\n",
" <td>ERVK-7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19029</th>\n",
" <td>P63119</td>\n",
" <td>VPK21_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>Endogenous retrovirus group K member 21 Pro pr...</td>\n",
" <td>ERVK-21</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>156</td>\n",
" <td>Peptidase A2 family, HERV class-II K(HML-2) su...</td>\n",
" <td>ERVK-21</td>\n",
" <td>ERVK-21</td>\n",
" <td>ERVK-21</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19030</th>\n",
" <td>P63127</td>\n",
" <td>VPK9_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>Endogenous retrovirus group K member 9 Pro pro...</td>\n",
" <td>ERVK-9</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>156</td>\n",
" <td>Peptidase A2 family, HERV class-II K(HML-2) su...</td>\n",
" <td>ERVK-9</td>\n",
" <td>ERVK-9</td>\n",
" <td>ERVK-9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19094</th>\n",
" <td>P63120</td>\n",
" <td>VPK19_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>Endogenous retrovirus group K member 19 Pro pr...</td>\n",
" <td>ERVK-19</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>156</td>\n",
" <td>Peptidase A2 family, HERV class-II K(HML-2) su...</td>\n",
" <td>ERVK-19</td>\n",
" <td>ERVK-19</td>\n",
" <td>ERVK-19</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19095</th>\n",
" <td>P63129</td>\n",
" <td>VPK24_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>Endogenous retrovirus group K member 24 Pro pr...</td>\n",
" <td>ERVK-24</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>156</td>\n",
" <td>Peptidase A2 family, HERV class-II K(HML-2) su...</td>\n",
" <td>ERVK-24</td>\n",
" <td>ERVK-24</td>\n",
" <td>ERVK-24</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19096</th>\n",
" <td>Q9Y6I0</td>\n",
" <td>VPK6_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>Endogenous retrovirus group K member 6 Pro pro...</td>\n",
" <td>ERVK-6 ERVK6</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>156</td>\n",
" <td>Peptidase A2 family, HERV class-II K(HML-2) su...</td>\n",
" <td>ERVK-6</td>\n",
" <td>ERVK-6</td>\n",
" <td>ERVK-6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19355</th>\n",
" <td>Q9NPA5</td>\n",
" <td>ZF64A_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>Zinc finger protein 64 homolog, isoforms 1 and...</td>\n",
" <td>ZFP64 ZNF338</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>681</td>\n",
" <td>Krueppel C2H2-type zinc-finger protein family</td>\n",
" <td>ZFP64</td>\n",
" <td>ZFP64</td>\n",
" <td>ZFP64</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19869</th>\n",
" <td>Q9NTW7</td>\n",
" <td>ZF64B_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>Zinc finger protein 64 homolog, isoforms 3 and...</td>\n",
" <td>ZFP64 ZNF338</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>645</td>\n",
" <td>Krueppel C2H2-type zinc-finger protein family</td>\n",
" <td>ZFP64</td>\n",
" <td>ZFP64</td>\n",
" <td>ZFP64</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20088</th>\n",
" <td>Q70YC5</td>\n",
" <td>ZN365_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>Protein ZNF365 (Protein su48)</td>\n",
" <td>ZNF365 KIAA0844</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>407</td>\n",
" <td>NaN</td>\n",
" <td>ZNF365</td>\n",
" <td>ZNF365</td>\n",
" <td>ZNF365</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>211 rows × 11 columns</p>\n",
"</div>"
],
"text/plain": [
" Entry Entry name Status \\\n",
"1 P04439 1A03_HUMAN reviewed \n",
"2 P01889 1B07_HUMAN reviewed \n",
"3 P30464 1B15_HUMAN reviewed \n",
"4 P30685 1B35_HUMAN reviewed \n",
"5 Q95365 1B38_HUMAN reviewed \n",
"6 Q04826 1B40_HUMAN reviewed \n",
"7 P30484 1B46_HUMAN reviewed \n",
"8 P30492 1B54_HUMAN reviewed \n",
"9 P30493 1B55_HUMAN reviewed \n",
"10 P18465 1B57_HUMAN reviewed \n",
"11 Q29836 1B67_HUMAN reviewed \n",
"12 Q31610 1B81_HUMAN reviewed \n",
"13 P30499 1C01_HUMAN reviewed \n",
"14 Q29865 1C18_HUMAN reviewed \n",
"42 P30443 1A01_HUMAN reviewed \n",
"43 P01892 1A02_HUMAN reviewed \n",
"44 P13746 1A11_HUMAN reviewed \n",
"46 P30447 1A23_HUMAN reviewed \n",
"47 P18462 1A25_HUMAN reviewed \n",
"48 P16190 1A33_HUMAN reviewed \n",
"49 P30455 1A36_HUMAN reviewed \n",
"50 P30457 1A66_HUMAN reviewed \n",
"51 P01891 1A68_HUMAN reviewed \n",
"52 P30460 1B08_HUMAN reviewed \n",
"53 P30480 1B42_HUMAN reviewed \n",
"54 P30483 1B45_HUMAN reviewed \n",
"55 Q29940 1B59_HUMAN reviewed \n",
"56 P30498 1B78_HUMAN reviewed \n",
"57 Q9TNN7 1C05_HUMAN reviewed \n",
"58 Q29963 1C06_HUMAN reviewed \n",
"... ... ... ... \n",
"14353 P61572 REC19_HUMAN reviewed \n",
"14354 P61573 REC9_HUMAN reviewed \n",
"14415 P61576 REC04_HUMAN reviewed \n",
"14843 O95059 RPP14_HUMAN reviewed \n",
"14963 P62861 RS30_HUMAN reviewed \n",
"15830 O00241 SIRB1_HUMAN reviewed \n",
"16060 Q5TFQ8 SIRBL_HUMAN reviewed \n",
"17126 Q70YC4 TALAN_HUMAN reviewed \n",
"17709 Q8WZ04 TOMT_HUMAN reviewed \n",
"17711 Q8N2E6 TOR2X_HUMAN reviewed \n",
"17864 Q8NFQ8 TOIP2_HUMAN reviewed \n",
"18056 Q5JU69 TOR2A_HUMAN reviewed \n",
"18168 B1AH88 TSPOB_HUMAN reviewed \n",
"18208 P30536 TSPO_HUMAN reviewed \n",
"18446 P35544 UBIM_HUMAN reviewed \n",
"18805 P63124 VPK04_HUMAN reviewed \n",
"18806 P63123 VPK18_HUMAN reviewed \n",
"18807 P63125 VPK25_HUMAN reviewed \n",
"18808 P63122 VPK8_HUMAN reviewed \n",
"18921 P10265 VPK10_HUMAN reviewed \n",
"18967 P63121 VP113_HUMAN reviewed \n",
"18970 P63131 VPK7_HUMAN reviewed \n",
"19029 P63119 VPK21_HUMAN reviewed \n",
"19030 P63127 VPK9_HUMAN reviewed \n",
"19094 P63120 VPK19_HUMAN reviewed \n",
"19095 P63129 VPK24_HUMAN reviewed \n",
"19096 Q9Y6I0 VPK6_HUMAN reviewed \n",
"19355 Q9NPA5 ZF64A_HUMAN reviewed \n",
"19869 Q9NTW7 ZF64B_HUMAN reviewed \n",
"20088 Q70YC5 ZN365_HUMAN reviewed \n",
"\n",
" Protein names \\\n",
"1 HLA class I histocompatibility antigen, A-3 al... \n",
"2 HLA class I histocompatibility antigen, B-7 al... \n",
"3 HLA class I histocompatibility antigen, B-15 a... \n",
"4 HLA class I histocompatibility antigen, B-35 a... \n",
"5 HLA class I histocompatibility antigen, B-38 a... \n",
"6 HLA class I histocompatibility antigen, B-40 a... \n",
"7 HLA class I histocompatibility antigen, B-46 a... \n",
"8 HLA class I histocompatibility antigen, B-54 a... \n",
"9 HLA class I histocompatibility antigen, B-55 a... \n",
"10 HLA class I histocompatibility antigen, B-57 a... \n",
"11 HLA class I histocompatibility antigen, B-67 a... \n",
"12 HLA class I histocompatibility antigen, B-81 a... \n",
"13 HLA class I histocompatibility antigen, Cw-1 a... \n",
"14 HLA class I histocompatibility antigen, Cw-18 ... \n",
"42 HLA class I histocompatibility antigen, A-1 al... \n",
"43 HLA class I histocompatibility antigen, A-2 al... \n",
"44 HLA class I histocompatibility antigen, A-11 a... \n",
"46 HLA class I histocompatibility antigen, A-23 a... \n",
"47 HLA class I histocompatibility antigen, A-25 a... \n",
"48 HLA class I histocompatibility antigen, A-33 a... \n",
"49 HLA class I histocompatibility antigen, A-36 a... \n",
"50 HLA class I histocompatibility antigen, A-66 a... \n",
"51 HLA class I histocompatibility antigen, A-68 a... \n",
"52 HLA class I histocompatibility antigen, B-8 al... \n",
"53 HLA class I histocompatibility antigen, B-42 a... \n",
"54 HLA class I histocompatibility antigen, B-45 a... \n",
"55 HLA class I histocompatibility antigen, B-59 a... \n",
"56 HLA class I histocompatibility antigen, B-78 a... \n",
"57 HLA class I histocompatibility antigen, Cw-5 a... \n",
"58 HLA class I histocompatibility antigen, Cw-6 a... \n",
"... ... \n",
"14353 Endogenous retrovirus group K member 19 Rec pr... \n",
"14354 Endogenous retrovirus group K member 9 Rec pro... \n",
"14415 Endogenous retrovirus group K member 104 Rec p... \n",
"14843 Ribonuclease P protein subunit p14 (EC 3.1.26.5) \n",
"14963 40S ribosomal protein S30 \n",
"15830 Signal-regulatory protein beta-1 (SIRP-beta-1)... \n",
"16060 Signal-regulatory protein beta-1 isoform 3 (SI... \n",
"17126 Talanin \n",
"17709 Transmembrane O-methyltransferase (EC 2.1.1.6)... \n",
"17711 Prosalusin (Torsin family 2 member A) (Torsin-... \n",
"17864 Torsin-1A-interacting protein 2 (Lumenal domai... \n",
"18056 Torsin-2A (Torsin family 2 member A) (Torsin-r... \n",
"18168 Putative peripheral benzodiazepine receptor-re... \n",
"18208 Translocator protein (Mitochondrial benzodiaze... \n",
"18446 Ubiquitin-like protein FUBI \n",
"18805 Endogenous retrovirus group K member 104 Pro p... \n",
"18806 Endogenous retrovirus group K member 18 Pro pr... \n",
"18807 Endogenous retrovirus group K member 25 Pro pr... \n",
"18808 Endogenous retrovirus group K member 8 Pro pro... \n",
"18921 Endogenous retrovirus group K member 10 Pro pr... \n",
"18967 Endogenous retrovirus group K member 113 Pro p... \n",
"18970 Endogenous retrovirus group K member 7 Pro pro... \n",
"19029 Endogenous retrovirus group K member 21 Pro pr... \n",
"19030 Endogenous retrovirus group K member 9 Pro pro... \n",
"19094 Endogenous retrovirus group K member 19 Pro pr... \n",
"19095 Endogenous retrovirus group K member 24 Pro pr... \n",
"19096 Endogenous retrovirus group K member 6 Pro pro... \n",
"19355 Zinc finger protein 64 homolog, isoforms 1 and... \n",
"19869 Zinc finger protein 64 homolog, isoforms 3 and... \n",
"20088 Protein ZNF365 (Protein su48) \n",
"\n",
" Gene names Organism Length \\\n",
"1 HLA-A HLAA Homo sapiens (Human) 365 \n",
"2 HLA-B HLAB Homo sapiens (Human) 362 \n",
"3 HLA-B HLAB Homo sapiens (Human) 362 \n",
"4 HLA-B HLAB Homo sapiens (Human) 362 \n",
"5 HLA-B HLAB Homo sapiens (Human) 362 \n",
"6 HLA-B HLAB Homo sapiens (Human) 362 \n",
"7 HLA-B HLAB Homo sapiens (Human) 362 \n",
"8 HLA-B HLAB Homo sapiens (Human) 362 \n",
"9 HLA-B HLAB CDABP0067 Homo sapiens (Human) 362 \n",
"10 HLA-B HLAB Homo sapiens (Human) 362 \n",
"11 HLA-B HLAB Homo sapiens (Human) 362 \n",
"12 HLA-B HLAB Homo sapiens (Human) 362 \n",
"13 HLA-C HLAC Homo sapiens (Human) 366 \n",
"14 HLA-C HLAC Homo sapiens (Human) 366 \n",
"42 HLA-A HLAA Homo sapiens (Human) 365 \n",
"43 HLA-A HLAA Homo sapiens (Human) 365 \n",
"44 HLA-A HLAA Homo sapiens (Human) 365 \n",
"46 HLA-A HLAA Homo sapiens (Human) 365 \n",
"47 HLA-A HLAA Homo sapiens (Human) 365 \n",
"48 HLA-A HLAA Homo sapiens (Human) 365 \n",
"49 HLA-A HLAA Homo sapiens (Human) 365 \n",
"50 HLA-A HLAA Homo sapiens (Human) 365 \n",
"51 HLA-A HLAA Homo sapiens (Human) 365 \n",
"52 HLA-B HLAB Homo sapiens (Human) 362 \n",
"53 HLA-B HLAB Homo sapiens (Human) 362 \n",
"54 HLA-B HLAB Homo sapiens (Human) 362 \n",
"55 HLA-B HLAB Homo sapiens (Human) 362 \n",
"56 HLA-B HLAB Homo sapiens (Human) 362 \n",
"57 HLA-C HLAC Homo sapiens (Human) 366 \n",
"58 HLA-C HLAC Homo sapiens (Human) 366 \n",
"... ... ... ... \n",
"14353 ERVK-19 Homo sapiens (Human) 105 \n",
"14354 ERVK-9 Homo sapiens (Human) 105 \n",
"14415 HERV-K104 Homo sapiens (Human) 105 \n",
"14843 RPP14 Homo sapiens (Human) 124 \n",
"14963 FAU Homo sapiens (Human) 59 \n",
"15830 SIRPB1 Homo sapiens (Human) 398 \n",
"16060 SIRPB1 Homo sapiens (Human) 398 \n",
"17126 ZNF365 KIAA0844 Homo sapiens (Human) 216 \n",
"17709 LRTOMT COMT2 TOMT PP7517 Homo sapiens (Human) 291 \n",
"17711 TOR2A HEMBA1005096 PSEC0218 Homo sapiens (Human) 242 \n",
"17864 TOR1AIP2 IFRG15 LULL1 Homo sapiens (Human) 470 \n",
"18056 TOR2A TORP1 UNQ6408/PRO21181 Homo sapiens (Human) 321 \n",
"18168 TSPO PBRS Homo sapiens (Human) 102 \n",
"18208 TSPO BZRP MBR Homo sapiens (Human) 169 \n",
"18446 FAU Homo sapiens (Human) 74 \n",
"18805 HERV-K104 Homo sapiens (Human) 156 \n",
"18806 ERVK-18 Homo sapiens (Human) 156 \n",
"18807 ERVK-25 Homo sapiens (Human) 156 \n",
"18808 ERVK-8 Homo sapiens (Human) 156 \n",
"18921 ERVK-10 Homo sapiens (Human) 156 \n",
"18967 HERVK_113 Homo sapiens (Human) 156 \n",
"18970 ERVK-7 Homo sapiens (Human) 156 \n",
"19029 ERVK-21 Homo sapiens (Human) 156 \n",
"19030 ERVK-9 Homo sapiens (Human) 156 \n",
"19094 ERVK-19 Homo sapiens (Human) 156 \n",
"19095 ERVK-24 Homo sapiens (Human) 156 \n",
"19096 ERVK-6 ERVK6 Homo sapiens (Human) 156 \n",
"19355 ZFP64 ZNF338 Homo sapiens (Human) 681 \n",
"19869 ZFP64 ZNF338 Homo sapiens (Human) 645 \n",
"20088 ZNF365 KIAA0844 Homo sapiens (Human) 407 \n",
"\n",
" Protein families genename1 \\\n",
"1 MHC class I family HLA-A \n",
"2 MHC class I family HLA-B \n",
"3 MHC class I family HLA-B \n",
"4 MHC class I family HLA-B \n",
"5 MHC class I family HLA-B \n",
"6 MHC class I family HLA-B \n",
"7 MHC class I family HLA-B \n",
"8 MHC class I family HLA-B \n",
"9 MHC class I family HLA-B \n",
"10 MHC class I family HLA-B \n",
"11 MHC class I family HLA-B \n",
"12 MHC class I family HLA-B \n",
"13 MHC class I family HLA-C \n",
"14 MHC class I family HLA-C \n",
"42 MHC class I family HLA-A \n",
"43 MHC class I family HLA-A \n",
"44 MHC class I family HLA-A \n",
"46 MHC class I family HLA-A \n",
"47 MHC class I family HLA-A \n",
"48 MHC class I family HLA-A \n",
"49 MHC class I family HLA-A \n",
"50 MHC class I family HLA-A \n",
"51 MHC class I family HLA-A \n",
"52 MHC class I family HLA-B \n",
"53 MHC class I family HLA-B \n",
"54 MHC class I family HLA-B \n",
"55 MHC class I family HLA-B \n",
"56 MHC class I family HLA-B \n",
"57 MHC class I family HLA-C \n",
"58 MHC class I family HLA-C \n",
"... ... ... \n",
"14353 NaN ERVK-19 \n",
"14354 NaN ERVK-9 \n",
"14415 NaN HERV-K104 \n",
"14843 Eukaryotic/archaeal RNase P protein component ... RPP14 \n",
"14963 Ribosomal protein S30e family FAU \n",
"15830 NaN SIRPB1 \n",
"16060 NaN SIRPB1 \n",
"17126 NaN ZNF365 \n",
"17709 Class I-like SAM-binding methyltransferase sup... LRTOMT \n",
"17711 ClpA/ClpB family, Torsin subfamily TOR2A \n",
"17864 TOR1AIP family TOR1AIP2 \n",
"18056 ClpA/ClpB family, Torsin subfamily TOR2A \n",
"18168 NaN TSPO \n",
"18208 TspO/BZRP family TSPO \n",
"18446 Ubiquitin family FAU \n",
"18805 Peptidase A2 family, HERV class-II K(HML-2) su... HERV-K104 \n",
"18806 Peptidase A2 family, HERV class-II K(HML-2) su... ERVK-18 \n",
"18807 Peptidase A2 family, HERV class-II K(HML-2) su... ERVK-25 \n",
"18808 Peptidase A2 family, HERV class-II K(HML-2) su... ERVK-8 \n",
"18921 Peptidase A2 family, HERV class-II K(HML-2) su... ERVK-10 \n",
"18967 Peptidase A2 family, HERV class-II K(HML-2) su... HERVK_113 \n",
"18970 Peptidase A2 family, HERV class-II K(HML-2) su... ERVK-7 \n",
"19029 Peptidase A2 family, HERV class-II K(HML-2) su... ERVK-21 \n",
"19030 Peptidase A2 family, HERV class-II K(HML-2) su... ERVK-9 \n",
"19094 Peptidase A2 family, HERV class-II K(HML-2) su... ERVK-19 \n",
"19095 Peptidase A2 family, HERV class-II K(HML-2) su... ERVK-24 \n",
"19096 Peptidase A2 family, HERV class-II K(HML-2) su... ERVK-6 \n",
"19355 Krueppel C2H2-type zinc-finger protein family ZFP64 \n",
"19869 Krueppel C2H2-type zinc-finger protein family ZFP64 \n",
"20088 NaN ZNF365 \n",
"\n",
" genename genename_first \n",
"1 HLA-A HLA-A \n",
"2 HLA-B HLA-B \n",
"3 HLA-B HLA-B \n",
"4 HLA-B HLA-B \n",
"5 HLA-B HLA-B \n",
"6 HLA-B HLA-B \n",
"7 HLA-B HLA-B \n",
"8 HLA-B HLA-B \n",
"9 HLA-B HLA-B \n",
"10 HLA-B HLA-B \n",
"11 HLA-B HLA-B \n",
"12 HLA-B HLA-B \n",
"13 HLA-C HLA-C \n",
"14 HLA-C HLA-C \n",
"42 HLA-A HLA-A \n",
"43 HLA-A HLA-A \n",
"44 HLA-A HLA-A \n",
"46 HLA-A HLA-A \n",
"47 HLA-A HLA-A \n",
"48 HLA-A HLA-A \n",
"49 HLA-A HLA-A \n",
"50 HLA-A HLA-A \n",
"51 HLA-A HLA-A \n",
"52 HLA-B HLA-B \n",
"53 HLA-B HLA-B \n",
"54 HLA-B HLA-B \n",
"55 HLA-B HLA-B \n",
"56 HLA-B HLA-B \n",
"57 HLA-C HLA-C \n",
"58 HLA-C HLA-C \n",
"... ... ... \n",
"14353 ERVK-19 ERVK-19 \n",
"14354 ERVK-9 ERVK-9 \n",
"14415 HERV-K104 HERV-K104 \n",
"14843 RPP14 RPP14 \n",
"14963 FAU FAU \n",
"15830 SIRPB1 SIRPB1 \n",
"16060 SIRPB1 SIRPB1 \n",
"17126 ZNF365 ZNF365 \n",
"17709 LRTOMT LRTOMT \n",
"17711 TOR2A TOR2A \n",
"17864 TOR1AIP2 TOR1AIP2 \n",
"18056 TOR2A TOR2A \n",
"18168 TSPO TSPO \n",
"18208 TSPO TSPO \n",
"18446 FAU FAU \n",
"18805 HERV-K104 HERV-K104 \n",
"18806 ERVK-18 ERVK-18 \n",
"18807 ERVK-25 ERVK-25 \n",
"18808 ERVK-8 ERVK-8 \n",
"18921 ERVK-10 ERVK-10 \n",
"18967 HERVK_113 HERVK_113 \n",
"18970 ERVK-7 ERVK-7 \n",
"19029 ERVK-21 ERVK-21 \n",
"19030 ERVK-9 ERVK-9 \n",
"19094 ERVK-19 ERVK-19 \n",
"19095 ERVK-24 ERVK-24 \n",
"19096 ERVK-6 ERVK-6 \n",
"19355 ZFP64 ZFP64 \n",
"19869 ZFP64 ZFP64 \n",
"20088 ZNF365 ZNF365 \n",
"\n",
"[211 rows x 11 columns]"
]
},
"execution_count": 231,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dupes = df[df['genename'].isin(genes.index)]\n",
"dupes"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Get metadata about these proteins"
]
},
{
"cell_type": "code",
"execution_count": 233,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loading cached metadata from cache/metadata.0.txt\n"
]
}
],
"source": [
"import uniprot\n",
"uniprot_data = uniprot.batch_uniprot_metadata(list(dupes.Entry), 'cache')"
]
},
{
"cell_type": "code",
"execution_count": 234,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"('Q31612',\n",
" {'GO:0000139',\n",
" 'GO:0002474',\n",
" 'GO:0002479',\n",
" 'GO:0002480',\n",
" 'GO:0003823',\n",
" 'GO:0005783',\n",
" 'GO:0005794',\n",
" 'GO:0005886',\n",
" 'GO:0009986',\n",
" 'GO:0012507',\n",
" 'GO:0016032',\n",
" 'GO:0019882',\n",
" 'GO:0030670',\n",
" 'GO:0031901',\n",
" 'GO:0042605',\n",
" 'GO:0042612',\n",
" 'GO:0050776',\n",
" 'GO:0060333',\n",
" 'GO:0060337',\n",
" 'GO:0071556'})\n"
]
}
],
"source": [
"go = {k:v.get('go',[]) for k,v in uniprot_data.items()}\n",
"go = {k:set(x.split(\";\")[0] for x in v) for k,v in go.items()}\n",
"pprint(list(go.items())[10])"
]
},
{
"cell_type": "code",
"execution_count": 235,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/lib/python3.5/dist-packages/pandas/core/indexing.py:288: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
" self.obj[key] = _infer_fill_value(value)\n",
"/usr/local/lib/python3.5/dist-packages/pandas/core/indexing.py:465: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
" self.obj[item] = s\n"
]
}
],
"source": [
"dupes.loc[:,'go'] = pd.Series(dupes.Entry.map(lambda x: go[x]))"
]
},
{
"cell_type": "code",
"execution_count": 236,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Entry</th>\n",
" <th>Entry name</th>\n",
" <th>Status</th>\n",
" <th>Protein names</th>\n",
" <th>Gene names</th>\n",
" <th>Organism</th>\n",
" <th>Length</th>\n",
" <th>Protein families</th>\n",
" <th>genename1</th>\n",
" <th>genename</th>\n",
" <th>genename_first</th>\n",
" <th>go</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>P04439</td>\n",
" <td>1A03_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>HLA class I histocompatibility antigen, A-3 al...</td>\n",
" <td>HLA-A HLAA</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>365</td>\n",
" <td>MHC class I family</td>\n",
" <td>HLA-A</td>\n",
" <td>HLA-A</td>\n",
" <td>HLA-A</td>\n",
" <td>{GO:0009986, GO:0031901, GO:0046977, GO:001250...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>P01889</td>\n",
" <td>1B07_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>HLA class I histocompatibility antigen, B-7 al...</td>\n",
" <td>HLA-B HLAB</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>362</td>\n",
" <td>MHC class I family</td>\n",
" <td>HLA-B</td>\n",
" <td>HLA-B</td>\n",
" <td>HLA-B</td>\n",
" <td>{GO:0009986, GO:0032675, GO:0042270, GO:200119...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>P30464</td>\n",
" <td>1B15_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>HLA class I histocompatibility antigen, B-15 a...</td>\n",
" <td>HLA-B HLAB</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>362</td>\n",
" <td>MHC class I family</td>\n",
" <td>HLA-B</td>\n",
" <td>HLA-B</td>\n",
" <td>HLA-B</td>\n",
" <td>{GO:0060337, GO:0005794, GO:0005783, GO:000998...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>P30685</td>\n",
" <td>1B35_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>HLA class I histocompatibility antigen, B-35 a...</td>\n",
" <td>HLA-B HLAB</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>362</td>\n",
" <td>MHC class I family</td>\n",
" <td>HLA-B</td>\n",
" <td>HLA-B</td>\n",
" <td>HLA-B</td>\n",
" <td>{GO:0060337, GO:0005794, GO:0005783, GO:000998...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Q95365</td>\n",
" <td>1B38_HUMAN</td>\n",
" <td>reviewed</td>\n",
" <td>HLA class I histocompatibility antigen, B-38 a...</td>\n",
" <td>HLA-B HLAB</td>\n",
" <td>Homo sapiens (Human)</td>\n",
" <td>362</td>\n",
" <td>MHC class I family</td>\n",
" <td>HLA-B</td>\n",
" <td>HLA-B</td>\n",
" <td>HLA-B</td>\n",
" <td>{GO:0060337, GO:0005794, GO:0005783, GO:000998...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Entry Entry name Status \\\n",
"1 P04439 1A03_HUMAN reviewed \n",
"2 P01889 1B07_HUMAN reviewed \n",
"3 P30464 1B15_HUMAN reviewed \n",
"4 P30685 1B35_HUMAN reviewed \n",
"5 Q95365 1B38_HUMAN reviewed \n",
"\n",
" Protein names Gene names \\\n",
"1 HLA class I histocompatibility antigen, A-3 al... HLA-A HLAA \n",
"2 HLA class I histocompatibility antigen, B-7 al... HLA-B HLAB \n",
"3 HLA class I histocompatibility antigen, B-15 a... HLA-B HLAB \n",
"4 HLA class I histocompatibility antigen, B-35 a... HLA-B HLAB \n",
"5 HLA class I histocompatibility antigen, B-38 a... HLA-B HLAB \n",
"\n",
" Organism Length Protein families genename1 genename \\\n",
"1 Homo sapiens (Human) 365 MHC class I family HLA-A HLA-A \n",
"2 Homo sapiens (Human) 362 MHC class I family HLA-B HLA-B \n",
"3 Homo sapiens (Human) 362 MHC class I family HLA-B HLA-B \n",
"4 Homo sapiens (Human) 362 MHC class I family HLA-B HLA-B \n",
"5 Homo sapiens (Human) 362 MHC class I family HLA-B HLA-B \n",
"\n",
" genename_first go \n",
"1 HLA-A {GO:0009986, GO:0031901, GO:0046977, GO:001250... \n",
"2 HLA-B {GO:0009986, GO:0032675, GO:0042270, GO:200119... \n",
"3 HLA-B {GO:0060337, GO:0005794, GO:0005783, GO:000998... \n",
"4 HLA-B {GO:0060337, GO:0005794, GO:0005783, GO:000998... \n",
"5 HLA-B {GO:0060337, GO:0005794, GO:0005783, GO:000998... "
]
},
"execution_count": 236,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dupes.head()"
]
},
{
"cell_type": "code",
"execution_count": 237,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"genename1\n",
"AKAP7 2\n",
"BBC3 2\n",
"BGLAP 2\n",
"CALCA 2\n",
"CDKN2A 2\n",
"CHTF8 2\n",
"CUX1 2\n",
"DUSP13 2\n",
"EPM2A 2\n",
"ERVK-10 4\n",
"ERVK-18 3\n",
"ERVK-19 5\n",
"ERVK-21 4\n",
"ERVK-24 4\n",
"ERVK-25 4\n",
"ERVK-5 3\n",
"ERVK-6 5\n",
"ERVK-7 5\n",
"ERVK-8 5\n",
"ERVK-9 5\n",
"EXD3 2\n",
"FAM127A 2\n",
"FAU 2\n",
"GNAS 4\n",
"HERV-K104 2\n",
"HERVK_113 5\n",
"HLA-A 13\n",
"HLA-B 13\n",
"HLA-C 10\n",
"HLA-DRB1 8\n",
"HMSD 2\n",
"KIAA1456 2\n",
"LRTOMT 2\n",
"MECOM 2\n",
"MOCS2 2\n",
"NACA 2\n",
"NRXN1 2\n",
"NRXN2 2\n",
"NRXN3 2\n",
"POLR2M 2\n",
"PRNP 2\n",
"RAB34 2\n",
"RABGAP1L 2\n",
"RPP14 2\n",
"SIRPB1 2\n",
"TMPO 2\n",
"TOR1AIP2 2\n",
"TOR2A 2\n",
"TSPO 2\n",
"ZNF365 2\n",
"Name: go, dtype: int64"
]
},
"execution_count": 237,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dupes_go = dupes.groupby(\"genename1\").agg({'go': lambda x:len(set(frozenset(s) for s in x))})['go']\n",
"dupes_go = dupes_go[dupes_go>1]\n",
"dupes_go"
]
},
{
"cell_type": "code",
"execution_count": 238,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"50"
]
},
"execution_count": 238,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(dupes_go)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment