Skip to content

Instantly share code, notes, and snippets.

@davidwhogg
Last active June 23, 2022 00:23
Show Gist options
  • Save davidwhogg/aa850fbcf69f435699752ba786a980d8 to your computer and use it in GitHub Desktop.
Save davidwhogg/aa850fbcf69f435699752ba786a980d8 to your computer and use it in GitHub Desktop.
A notebook for the Gaia Hike
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"id": "06320673",
"metadata": {},
"source": [
"# XP coefficients and APOGEE data\n",
"\n",
"## Authors:\n",
"- **Adrian Price-Whelan** (Flatiron)\n",
"- **David W. Hogg** (NYU) (Flatiron)"
]
},
{
"cell_type": "markdown",
"id": "60f627c4",
"metadata": {},
"source": [
"## Read in APOGEE data"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "aa26193d",
"metadata": {
"ExecuteTime": {
"end_time": "2022-06-15T01:40:27.141892Z",
"start_time": "2022-06-15T01:40:27.135575Z"
}
},
"outputs": [],
"source": [
"import pathlib\n",
"\n",
"from astropy.convolution import convolve, Gaussian2DKernel\n",
"import astropy.coordinates as coord\n",
"from astropy.stats import median_absolute_deviation as MAD\n",
"import astropy.table as at\n",
"import astropy.units as u\n",
"import matplotlib as mpl\n",
"import matplotlib.pyplot as plt\n",
"%matplotlib inline\n",
"import numpy as np\n",
"from scipy.stats import binned_statistic, binned_statistic_2d\n",
"\n",
"from pyia import GaiaData\n",
"import h5py\n",
"\n",
"from tqdm import tqdm"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2eb39275",
"metadata": {
"ExecuteTime": {
"end_time": "2022-06-15T01:22:16.673443Z",
"start_time": "2022-06-15T01:22:07.797965Z"
}
},
"outputs": [],
"source": [
"datadir = \"./\"\n",
"xm = at.Table.read(datadir + 'allStar-dr17-synspec-gaiadr3.fits')\n",
"xm2 = at.Table.read(datadir + 'allStar-dr17-synspec-gaiadr3-gaiasourcelite.fits')\n",
"xm2.rename_column('source_id', 'GAIADR3_SOURCE_ID')\n",
"allstar = at.Table.read(datadir + 'allStarLite-dr17-synspec_rev1.fits')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "37f1f66a",
"metadata": {
"ExecuteTime": {
"end_time": "2022-06-15T17:47:13.865546Z",
"start_time": "2022-06-15T17:46:54.953972Z"
}
},
"outputs": [],
"source": [
"tbl = at.unique(at.hstack((allstar, xm)), keys='APOGEE_ID')\n",
"tbl = tbl[tbl['GAIADR3_SOURCE_ID'] != 0]\n",
"tbl = at.join(tbl, xm2, keys='GAIADR3_SOURCE_ID')\n",
"len(tbl)"
]
},
{
"cell_type": "markdown",
"id": "0513a3c2",
"metadata": {},
"source": [
"## Read in the BP/RP Coefficients"
]
},
{
"cell_type": "markdown",
"id": "07ef4639",
"metadata": {},
"source": [
"See: `DR3-xp-subset.ipynb` for getting the data"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "79306271",
"metadata": {
"ExecuteTime": {
"end_time": "2022-06-15T17:47:15.631613Z",
"start_time": "2022-06-15T17:47:15.629159Z"
}
},
"outputs": [],
"source": [
"apogee_xp_cont_filename = pathlib.Path(datadir + 'apogee-dr17-xpcontinuous.hdf5')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "02677824",
"metadata": {
"ExecuteTime": {
"end_time": "2022-06-15T17:47:23.349365Z",
"start_time": "2022-06-15T17:47:16.202584Z"
}
},
"outputs": [],
"source": [
"# Read data and lightly rearrange\n",
"xp_tbl = at.Table()\n",
"with h5py.File(apogee_xp_cont_filename, 'r') as f:\n",
" xp_tbl['GAIADR3_SOURCE_ID'] = f['source_id'][:]\n",
" xp_tbl['bp'] = f['bp_coefficients'][:]\n",
" xp_tbl['rp'] = f['rp_coefficients'][:]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f4f341ec",
"metadata": {
"ExecuteTime": {
"end_time": "2022-06-15T17:47:29.937161Z",
"start_time": "2022-06-15T17:47:23.350804Z"
}
},
"outputs": [],
"source": [
"# Read data and make simple cuts\n",
"xp_apogee_tbl = at.join(tbl, xp_tbl, keys='GAIADR3_SOURCE_ID')\n",
"xp_apogee_tbl = xp_apogee_tbl[\n",
" (xp_apogee_tbl['TEFF'] > 0) &\n",
" (xp_apogee_tbl['LOGG'] > -0.5) &\n",
" (xp_apogee_tbl['M_H'] > -3.)\n",
"]\n",
"len(xp_apogee_tbl)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2657bb59",
"metadata": {
"ExecuteTime": {
"end_time": "2022-06-15T17:47:41.085389Z",
"start_time": "2022-06-15T17:47:40.921033Z"
}
},
"outputs": [],
"source": [
"# Make XP spectral coefficient ratios.\n",
"_bp = xp_apogee_tbl['bp'][:, 1:] / xp_apogee_tbl['bp'][:, 0:1]\n",
"_rp = xp_apogee_tbl['rp'][:, 1:] / xp_apogee_tbl['rp'][:, 0:1]"
]
},
{
"cell_type": "markdown",
"id": "bcb8fa9d",
"metadata": {},
"source": [
"## Make rectangular features and labels for any kind of plotting or ML...?"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e96bb05e",
"metadata": {},
"outputs": [],
"source": [
"# This does something useful!\n",
"xp_apogee_tbl = xp_apogee_tbl.filled()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6589c96e",
"metadata": {
"ExecuteTime": {
"end_time": "2022-06-15T17:48:04.069510Z",
"start_time": "2022-06-15T17:48:03.887065Z"
}
},
"outputs": [],
"source": [
"feature_mask = (\n",
" (xp_apogee_tbl['J'] < 13) &\n",
" (xp_apogee_tbl['H'] < 12) &\n",
" (xp_apogee_tbl['K'] < 11))\n",
"\n",
"features = np.hstack((\n",
" (xp_apogee_tbl['bp'][:, 1:11] / xp_apogee_tbl['bp'][:, 0:1])[feature_mask],\n",
" (xp_apogee_tbl['rp'][:, 1:11] / xp_apogee_tbl['rp'][:, 0:1])[feature_mask],\n",
" (xp_apogee_tbl['phot_bp_mean_mag'] - xp_apogee_tbl['phot_rp_mean_mag'])[feature_mask, None],\n",
" (xp_apogee_tbl['J'] - xp_apogee_tbl['H'])[feature_mask, None],\n",
" (xp_apogee_tbl['H'] - xp_apogee_tbl['K'])[feature_mask, None]\n",
"))\n",
"\n",
"feature_names = np.concatenate((\n",
" [f'BP[{i}]' for i in range(1, 10+1)],\n",
" [f'RP[{i}]' for i in range(1, 10+1)],\n",
" ['BP-RP', 'J-H', 'H-K'],\n",
"))\n",
"\n",
"print(features.shape)\n",
"print(len(feature_names), feature_names)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c4482ab5",
"metadata": {},
"outputs": [],
"source": [
"labels = np.vstack((\n",
" xp_apogee_tbl['TEFF'].filled(np.nan),\n",
" xp_apogee_tbl['LOGG'].filled(np.nan),\n",
" xp_apogee_tbl['AK_WISE'].filled(np.nan),\n",
" xp_apogee_tbl['M_H'].filled(np.nan),\n",
" xp_apogee_tbl['ALPHA_M'].filled(np.nan),\n",
" #(xp_apogee_tbl['parallax'] * 10 ** (1/5 * xp_apogee_tbl['phot_g_mean_mag'])).filled(np.nan)\n",
")).T[feature_mask]\n",
"print(labels.shape)\n",
"\n",
"label_names = ['TEFF', 'LOGG', 'AK_WISE', 'M_H', 'ALPHA_M'] # , 'SCHM_G']"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f73adfb4",
"metadata": {},
"outputs": [],
"source": [
"# check that the data aren't wack\n",
"plt.scatter(features[:, 10], labels[:, 0], c=\"k\", s=1., alpha=0.05)\n",
"plt.xlabel(feature_names[10])\n",
"plt.ylabel(label_names[0])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "aa1d5240",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.12"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment