Skip to content

Instantly share code, notes, and snippets.

@epassaro
Last active August 22, 2019 18:18
Show Gist options
  • Save epassaro/df20d080368a92ae2fc0930b3fba656d to your computer and use it in GitHub Desktop.
Save epassaro/df20d080368a92ae2fc0930b3fba656d to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Comparing atomic files"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"sql_levels = pd.read_hdf('kurucz_H-Zn_sql.h5', key='levels')\n",
"sql_lines = pd.read_hdf('kurucz_H-Zn_sql.h5', key='lines')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"nosql_levels = pd.read_hdf('kurucz_H-Zn_new2.h5', key='levels')\n",
"nosql_lines = pd.read_hdf('kurucz_H-Zn_new2.h5', key='lines')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"((272068, 8), (272068, 8))"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Number of lines is the same\n",
"sql_lines.shape, nosql_lines.shape"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"((25221, 3), (25222, 3))"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# We have one extra level\n",
"sql_levels.shape, nosql_levels.shape"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/epassaro/miniconda3/envs/carsus/lib/python3.6/site-packages/tqdm/autonotebook/__init__.py:18: TqdmExperimentalWarning: Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)\n",
" \" (e.g. in jupyter console)\", TqdmExperimentalWarning)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Ion: (2, 1) Diff: 1\n"
]
}
],
"source": [
"# Extra level comes from He II\n",
"from carsus.util import parse_selected_species\n",
"ions = parse_selected_species('H-Zn')\n",
"\n",
"for ion in ions:\n",
" try:\n",
" if len(nosql_levels.loc[ion]) != len(sql_levels.loc[ion]):\n",
" diff = len(nosql_levels.loc[ion]) - len(sql_levels.loc[ion])\n",
" print('Ion: {0} Diff: {1}'.format(ion, diff))\n",
" \n",
" except (KeyError, AttributeError, TypeError) as e:\n",
" print('No ion {}'.format(ion))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The SQL atomic file doesn't include ground level for `He II`"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>energy</th>\n",
" <th>g</th>\n",
" <th>metastable</th>\n",
" </tr>\n",
" <tr>\n",
" <th>level_number</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>40.813028</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>40.813086</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>40.813754</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>48.371294</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>48.371312</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>48.371509</td>\n",
" <td>4</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>48.371509</td>\n",
" <td>4</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>48.371581</td>\n",
" <td>6</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" energy g metastable\n",
"level_number \n",
"0 40.813028 2 True\n",
"1 40.813086 2 True\n",
"2 40.813754 4 True\n",
"3 48.371294 2 False\n",
"4 48.371312 2 False\n",
"5 48.371509 4 False\n",
"6 48.371509 4 False\n",
"7 48.371581 6 False"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sql_levels.loc[(2,1)] # No ground level"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>energy</th>\n",
" <th>g</th>\n",
" <th>metastable</th>\n",
" </tr>\n",
" <tr>\n",
" <th>level_number</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.000000</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>40.813028</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>40.813086</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>40.813754</td>\n",
" <td>4</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>48.371294</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>48.371312</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>48.371509</td>\n",
" <td>4</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>48.371509</td>\n",
" <td>4</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>48.371581</td>\n",
" <td>6</td>\n",
" <td>True</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" energy g metastable\n",
"level_number \n",
"0 0.000000 2 True\n",
"1 40.813028 2 True\n",
"2 40.813086 2 True\n",
"3 40.813754 4 False\n",
"4 48.371294 2 False\n",
"5 48.371312 2 False\n",
"6 48.371509 4 False\n",
"7 48.371509 4 False\n",
"8 48.371581 6 True"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"nosql_levels.loc[(2,1)] # Ground level"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This happens because the SQL code from `carsus/io/output/tardis_.py` lines `389-435` can't handle the case when an ion exists in Kurucz but the ground state is missing. Checking carefully `gfall.dat` with `GFALLReader` we can confirm there's no ground state for `He II` there:"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[\u001b[1mcarsus.io.kurucz.gfall\u001b[0m][\u001b[1;33mWARNING\u001b[0m] A specific combination to identify unique levels from the gfall data has not been given. Defaulting to [\"energy\", \"j\"]. (\u001b[1mgfall.py\u001b[0m:68)\n"
]
}
],
"source": [
"from carsus.io.kurucz import GFALLReader\n",
"\n",
"gfall_reader = GFALLReader('./gfall.dat')"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[\u001b[1mcarsus.io.kurucz.gfall\u001b[0m][\u001b[1;37mINFO\u001b[0m ] Parsing GFALL ./gfall.dat (\u001b[1mgfall.py\u001b[0m:116)\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>energy</th>\n",
" <th>j</th>\n",
" <th>label</th>\n",
" <th>method</th>\n",
" </tr>\n",
" <tr>\n",
" <th>level_index</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>329179.275</td>\n",
" <td>0.5</td>\n",
" <td>2P 2P</td>\n",
" <td>meas</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>329179.744</td>\n",
" <td>0.5</td>\n",
" <td>2S 2S</td>\n",
" <td>meas</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>329185.132</td>\n",
" <td>1.5</td>\n",
" <td>2P 2P</td>\n",
" <td>meas</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>390140.803</td>\n",
" <td>0.5</td>\n",
" <td>3P 2P 164</td>\n",
" <td>meas</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>390140.942</td>\n",
" <td>0.5</td>\n",
" <td>3S 2S 164</td>\n",
" <td>meas</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>390142.535</td>\n",
" <td>1.5</td>\n",
" <td>3D 2D 164</td>\n",
" <td>meas</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>390142.538</td>\n",
" <td>1.5</td>\n",
" <td>3P 2P 164</td>\n",
" <td>meas</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>390143.114</td>\n",
" <td>2.5</td>\n",
" <td>3D 2D 164</td>\n",
" <td>meas</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" energy j label method\n",
"level_index \n",
"0 329179.275 0.5 2P 2P meas\n",
"1 329179.744 0.5 2S 2S meas\n",
"2 329185.132 1.5 2P 2P meas\n",
"3 390140.803 0.5 3P 2P 164 meas\n",
"4 390140.942 0.5 3S 2S 164 meas\n",
"5 390142.535 1.5 3D 2D 164 meas\n",
"6 390142.538 1.5 3P 2P 164 meas\n",
"7 390143.114 2.5 3D 2D 164 meas"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"gfall_reader.levels.loc[(2,1)]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**IMHO**, the new code handles the situation just right."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## A more comprehensive check\n",
"\n",
"This check ensures all values in DataFrames are identical!"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(2, 1)\n"
]
}
],
"source": [
"for ion in ions:\n",
" k = sql_levels.loc[ion].eq(nosql_levels.loc[ion]).sum().sum()\n",
" if len(sql_levels.loc[ion])*3 != k:\n",
" print(ion)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(2, 1)\n"
]
}
],
"source": [
"for ion in ions:\n",
" try:\n",
" k = sql_lines.loc[ion].eq(nosql_lines.loc[ion]).sum().sum()\n",
" if len(sql_lines.loc[ion])*8 != k:\n",
" print(ion)\n",
" \n",
" except (KeyError, TypeError, ValueError):\n",
" pass"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Works really good."
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.7"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"state": {},
"version_major": 2,
"version_minor": 0
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment