Created
September 26, 2017 20:48
-
-
Save smoh/5bd9072564872fbb132fc30e294f7859 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Issue report for Simbad TAP Service\n", | |
"\n", | |
"HIPPARCOS Identifiers 'HIP XXXX' will only be matched in table `join`s if they are fit to 10-character lengths with whitespaces in between 'HIP' and 'XXXX'." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"from astropy.table import Table\n", | |
"import pyvo as vo" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"simbadtap = vo.dal.TAPService(\"http://simbad.u-strasbg.fr/simbad/sim-tap\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"<?xml version=\"1.0\" encoding=\"utf-8\"?>\r\n", | |
"<!-- Produced with astropy.io.votable version 2.0.2\r\n", | |
" http://www.astropy.org/ -->\r\n", | |
"<VOTABLE version=\"1.3\" xmlns=\"http://www.ivoa.net/xml/VOTable/v1.3\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:noNamespaceSchemaLocation=\"http://www.ivoa.net/xml/VOTable/v1.3\">\r\n", | |
" <RESOURCE type=\"results\">\r\n", | |
" <TABLE>\r\n", | |
" <FIELD ID=\"object\" arraysize=\"15\" datatype=\"unicodeChar\" name=\"object\"/>\r\n", | |
" <DATA>\r\n", | |
" <TABLEDATA>\r\n", | |
" <TR>\r\n", | |
" <TD>HIP 69721</TD>\r\n", | |
" </TR>\r\n", | |
" <TR>\r\n", | |
" <TD>HIP 67005</TD>\r\n", | |
" </TR>\r\n", | |
" <TR>\r\n", | |
" <TD>TYC 3851-600-1</TD>\r\n", | |
" </TR>\r\n", | |
" <TR>\r\n", | |
" <TD>HIP 67231</TD>\r\n", | |
" </TR>\r\n", | |
" <TR>\r\n", | |
" <TD>TYC 3851-336-1</TD>\r\n", | |
" </TR>\r\n", | |
" </TABLEDATA>\r\n", | |
" </DATA>\r\n", | |
" </TABLE>\r\n", | |
" </RESOURCE>\r\n", | |
"</VOTABLE>\r\n" | |
] | |
} | |
], | |
"source": [ | |
"!cat example.xml" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Upload table and use join" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"result = simbadtap.run_sync(\"\"\"\n", | |
"SELECT t.object, ident.oidref\n", | |
"FROM TAP_UPLOAD.mytable as t JOIN ident ON t.object = ident.id\n", | |
"\"\"\", uploads={\"mytable\":'example.xml'})" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<Table masked=True length=2>\n", | |
"<table id=\"table4741191328\" class=\"table-striped table-bordered table-condensed\">\n", | |
"<thead><tr><th>object</th><th>oidref</th></tr></thead>\n", | |
"<thead><tr><th>object</th><th>int64</th></tr></thead>\n", | |
"<tr><td>TYC 3851-600-1</td><td>6937777</td></tr>\n", | |
"<tr><td>TYC 3851-336-1</td><td>547197</td></tr>\n", | |
"</table>" | |
], | |
"text/plain": [ | |
"<Table masked=True length=2>\n", | |
" object oidref\n", | |
" object int64 \n", | |
"-------------- -------\n", | |
"TYC 3851-600-1 6937777\n", | |
"TYC 3851-336-1 547197" | |
] | |
}, | |
"execution_count": 12, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"result.table" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"All HIP ids with one space between `HIP` and number is **not** matched." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Use `where` statement with one id" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"result = simbadtap.run_sync(\"\"\"\n", | |
"-- Display all identifiers of a given object.\n", | |
"SELECT id2.id\n", | |
"FROM ident AS id1 JOIN ident AS id2 USING(oidref)\n", | |
"WHERE id1.id = 'hip 69721';\n", | |
"\"\"\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<Table masked=True length=12>\n", | |
"<table id=\"table4741439672\" class=\"table-striped table-bordered table-condensed\">\n", | |
"<thead><tr><th>id</th></tr></thead>\n", | |
"<thead><tr><th>object</th></tr></thead>\n", | |
"<tr><td>Gaia 1611685687553578752</td></tr>\n", | |
"<tr><td>AG+58 927</td></tr>\n", | |
"<tr><td>BD+59 1570</td></tr>\n", | |
"<tr><td>GSC 03865-00308</td></tr>\n", | |
"<tr><td>HD 125259</td></tr>\n", | |
"<tr><td>HIC 69721</td></tr>\n", | |
"<tr><td>HIP 69721</td></tr>\n", | |
"<tr><td>PPM 34434</td></tr>\n", | |
"<tr><td>SAO 29076</td></tr>\n", | |
"<tr><td>TYC 3865-308-1</td></tr>\n", | |
"<tr><td>YZ 58 7902</td></tr>\n", | |
"<tr><td>2MASS J14161743+5823218</td></tr>\n", | |
"</table>" | |
], | |
"text/plain": [ | |
"<Table masked=True length=12>\n", | |
" id \n", | |
" object \n", | |
"------------------------\n", | |
"Gaia 1611685687553578752\n", | |
" AG+58 927\n", | |
" BD+59 1570\n", | |
" GSC 03865-00308\n", | |
" HD 125259\n", | |
" HIC 69721\n", | |
" HIP 69721\n", | |
" PPM 34434\n", | |
" SAO 29076\n", | |
" TYC 3865-308-1\n", | |
" YZ 58 7902\n", | |
" 2MASS J14161743+5823218" | |
] | |
}, | |
"execution_count": 14, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"result.table" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"`HIP 69721` **is** matched." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'HIP 69721'" | |
] | |
}, | |
"execution_count": 21, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"result.table[6].as_void()[0].decode('utf-8')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"The only difference I can see between HIP id in the previous uploaded table and the one retrieved from Simbad is it's filled with whitespace upto 10 characters." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"10" | |
] | |
}, | |
"execution_count": 22, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"len(result.table[6].as_void()[0].decode('utf-8'))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"So let's try filling whitespace to HIP ids, and see if `join` works this time" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<Table masked=True length=5>\n", | |
"<table id=\"table4741514856\" class=\"table-striped table-bordered table-condensed\">\n", | |
"<thead><tr><th>object</th></tr></thead>\n", | |
"<thead><tr><th>str14</th></tr></thead>\n", | |
"<tr><td>HIP 69721</td></tr>\n", | |
"<tr><td>HIP 67005</td></tr>\n", | |
"<tr><td>TYC 3851-600-1</td></tr>\n", | |
"<tr><td>HIP 67231</td></tr>\n", | |
"<tr><td>TYC 3851-336-1</td></tr>\n", | |
"</table>" | |
], | |
"text/plain": [ | |
"<Table masked=True length=5>\n", | |
" object \n", | |
" str14 \n", | |
"--------------\n", | |
" HIP 69721\n", | |
" HIP 67005\n", | |
"TYC 3851-600-1\n", | |
" HIP 67231\n", | |
"TYC 3851-336-1" | |
] | |
}, | |
"execution_count": 23, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"t = Table.read(\"example.xml\", format='votable')\n", | |
"t['object'] = ['HIP{:7d}'.format(int(s.split()[1])) if 'HIP' in s else s for s in t['object']]\n", | |
"t" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 24, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'HIP 69721'" | |
] | |
}, | |
"execution_count": 24, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"t[0].as_void()[0]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 25, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"10" | |
] | |
}, | |
"execution_count": 25, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"len(t[0].as_void()[0])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 26, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"t.write(\"example_modified.xml\", format='votable')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 27, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"result = simbadtap.run_sync(\"\"\"\n", | |
"SELECT t.object, ident.oidref\n", | |
"FROM TAP_UPLOAD.mytable as t JOIN ident ON t.object = ident.id\n", | |
"\"\"\", uploads={\"mytable\":'example_modified.xml'})" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 28, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<Table masked=True length=5>\n", | |
"<table id=\"table4750213472\" class=\"table-striped table-bordered table-condensed\">\n", | |
"<thead><tr><th>object</th><th>oidref</th></tr></thead>\n", | |
"<thead><tr><th>object</th><th>int64</th></tr></thead>\n", | |
"<tr><td>HIP 69721</td><td>474820</td></tr>\n", | |
"<tr><td>HIP 67005</td><td>547216</td></tr>\n", | |
"<tr><td>TYC 3851-600-1</td><td>6937777</td></tr>\n", | |
"<tr><td>HIP 67231</td><td>529163</td></tr>\n", | |
"<tr><td>TYC 3851-336-1</td><td>547197</td></tr>\n", | |
"</table>" | |
], | |
"text/plain": [ | |
"<Table masked=True length=5>\n", | |
" object oidref\n", | |
" object int64 \n", | |
"-------------- -------\n", | |
" HIP 69721 474820\n", | |
" HIP 67005 547216\n", | |
"TYC 3851-600-1 6937777\n", | |
" HIP 67231 529163\n", | |
"TYC 3851-336-1 547197" | |
] | |
}, | |
"execution_count": 28, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"result.table" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"This time, it is matched. (??)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"anaconda-cloud": {}, | |
"gist": { | |
"data": { | |
"description": "", | |
"public": true | |
}, | |
"id": "" | |
}, | |
"kernelspec": { | |
"display_name": "Python [research]", | |
"language": "python", | |
"name": "Python [research]" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.2" | |
}, | |
"toc": { | |
"colors": { | |
"hover_highlight": "#DAA520", | |
"running_highlight": "#FF0000", | |
"selected_highlight": "#FFD700" | |
}, | |
"moveMenuLeft": true, | |
"nav_menu": { | |
"height": "66px", | |
"width": "252px" | |
}, | |
"navigate_menu": true, | |
"number_sections": true, | |
"sideBar": true, | |
"threshold": 4, | |
"toc_cell": false, | |
"toc_section_display": "block", | |
"toc_window_display": false | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment