Last active
August 22, 2019 18:18
-
-
Save epassaro/df20d080368a92ae2fc0930b3fba656d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Comparing atomic files" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"sql_levels = pd.read_hdf('kurucz_H-Zn_sql.h5', key='levels')\n", | |
"sql_lines = pd.read_hdf('kurucz_H-Zn_sql.h5', key='lines')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"nosql_levels = pd.read_hdf('kurucz_H-Zn_new2.h5', key='levels')\n", | |
"nosql_lines = pd.read_hdf('kurucz_H-Zn_new2.h5', key='lines')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"((272068, 8), (272068, 8))" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Number of lines is the same\n", | |
"sql_lines.shape, nosql_lines.shape" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"((25221, 3), (25222, 3))" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# We have one extra level\n", | |
"sql_levels.shape, nosql_levels.shape" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/home/epassaro/miniconda3/envs/carsus/lib/python3.6/site-packages/tqdm/autonotebook/__init__.py:18: TqdmExperimentalWarning: Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)\n", | |
" \" (e.g. in jupyter console)\", TqdmExperimentalWarning)\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Ion: (2, 1) Diff: 1\n" | |
] | |
} | |
], | |
"source": [ | |
"# Extra level comes from He II\n", | |
"from carsus.util import parse_selected_species\n", | |
"ions = parse_selected_species('H-Zn')\n", | |
"\n", | |
"for ion in ions:\n", | |
" try:\n", | |
" if len(nosql_levels.loc[ion]) != len(sql_levels.loc[ion]):\n", | |
" diff = len(nosql_levels.loc[ion]) - len(sql_levels.loc[ion])\n", | |
" print('Ion: {0} Diff: {1}'.format(ion, diff))\n", | |
" \n", | |
" except (KeyError, AttributeError, TypeError) as e:\n", | |
" print('No ion {}'.format(ion))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"The SQL atomic file doesn't include ground level for `He II`" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>energy</th>\n", | |
" <th>g</th>\n", | |
" <th>metastable</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>level_number</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>40.813028</td>\n", | |
" <td>2</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>40.813086</td>\n", | |
" <td>2</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>40.813754</td>\n", | |
" <td>4</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>48.371294</td>\n", | |
" <td>2</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>48.371312</td>\n", | |
" <td>2</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>48.371509</td>\n", | |
" <td>4</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>48.371509</td>\n", | |
" <td>4</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>48.371581</td>\n", | |
" <td>6</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" energy g metastable\n", | |
"level_number \n", | |
"0 40.813028 2 True\n", | |
"1 40.813086 2 True\n", | |
"2 40.813754 4 True\n", | |
"3 48.371294 2 False\n", | |
"4 48.371312 2 False\n", | |
"5 48.371509 4 False\n", | |
"6 48.371509 4 False\n", | |
"7 48.371581 6 False" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"sql_levels.loc[(2,1)] # No ground level" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>energy</th>\n", | |
" <th>g</th>\n", | |
" <th>metastable</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>level_number</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>2</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>40.813028</td>\n", | |
" <td>2</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>40.813086</td>\n", | |
" <td>2</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>40.813754</td>\n", | |
" <td>4</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>48.371294</td>\n", | |
" <td>2</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>48.371312</td>\n", | |
" <td>2</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>48.371509</td>\n", | |
" <td>4</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>48.371509</td>\n", | |
" <td>4</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>48.371581</td>\n", | |
" <td>6</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" energy g metastable\n", | |
"level_number \n", | |
"0 0.000000 2 True\n", | |
"1 40.813028 2 True\n", | |
"2 40.813086 2 True\n", | |
"3 40.813754 4 False\n", | |
"4 48.371294 2 False\n", | |
"5 48.371312 2 False\n", | |
"6 48.371509 4 False\n", | |
"7 48.371509 4 False\n", | |
"8 48.371581 6 True" | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"nosql_levels.loc[(2,1)] # Ground level" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"This happens because the SQL code from `carsus/io/output/tardis_.py` lines `389-435` can't handle the case when an ion exists in Kurucz but the ground state is missing. Checking carefully `gfall.dat` with `GFALLReader` we can confirm there's no ground state for `He II` there:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[\u001b[1mcarsus.io.kurucz.gfall\u001b[0m][\u001b[1;33mWARNING\u001b[0m] A specific combination to identify unique levels from the gfall data has not been given. Defaulting to [\"energy\", \"j\"]. (\u001b[1mgfall.py\u001b[0m:68)\n" | |
] | |
} | |
], | |
"source": [ | |
"from carsus.io.kurucz import GFALLReader\n", | |
"\n", | |
"gfall_reader = GFALLReader('./gfall.dat')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[\u001b[1mcarsus.io.kurucz.gfall\u001b[0m][\u001b[1;37mINFO\u001b[0m ] Parsing GFALL ./gfall.dat (\u001b[1mgfall.py\u001b[0m:116)\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>energy</th>\n", | |
" <th>j</th>\n", | |
" <th>label</th>\n", | |
" <th>method</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>level_index</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>329179.275</td>\n", | |
" <td>0.5</td>\n", | |
" <td>2P 2P</td>\n", | |
" <td>meas</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>329179.744</td>\n", | |
" <td>0.5</td>\n", | |
" <td>2S 2S</td>\n", | |
" <td>meas</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>329185.132</td>\n", | |
" <td>1.5</td>\n", | |
" <td>2P 2P</td>\n", | |
" <td>meas</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>390140.803</td>\n", | |
" <td>0.5</td>\n", | |
" <td>3P 2P 164</td>\n", | |
" <td>meas</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>390140.942</td>\n", | |
" <td>0.5</td>\n", | |
" <td>3S 2S 164</td>\n", | |
" <td>meas</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>390142.535</td>\n", | |
" <td>1.5</td>\n", | |
" <td>3D 2D 164</td>\n", | |
" <td>meas</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>390142.538</td>\n", | |
" <td>1.5</td>\n", | |
" <td>3P 2P 164</td>\n", | |
" <td>meas</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>390143.114</td>\n", | |
" <td>2.5</td>\n", | |
" <td>3D 2D 164</td>\n", | |
" <td>meas</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" energy j label method\n", | |
"level_index \n", | |
"0 329179.275 0.5 2P 2P meas\n", | |
"1 329179.744 0.5 2S 2S meas\n", | |
"2 329185.132 1.5 2P 2P meas\n", | |
"3 390140.803 0.5 3P 2P 164 meas\n", | |
"4 390140.942 0.5 3S 2S 164 meas\n", | |
"5 390142.535 1.5 3D 2D 164 meas\n", | |
"6 390142.538 1.5 3P 2P 164 meas\n", | |
"7 390143.114 2.5 3D 2D 164 meas" | |
] | |
}, | |
"execution_count": 10, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"gfall_reader.levels.loc[(2,1)]" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"**IMHO**, the new code handles the situation just right." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## A more comprehensive check\n", | |
"\n", | |
"This check ensures all values in DataFrames are identical!" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"(2, 1)\n" | |
] | |
} | |
], | |
"source": [ | |
"for ion in ions:\n", | |
" k = sql_levels.loc[ion].eq(nosql_levels.loc[ion]).sum().sum()\n", | |
" if len(sql_levels.loc[ion])*3 != k:\n", | |
" print(ion)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"(2, 1)\n" | |
] | |
} | |
], | |
"source": [ | |
"for ion in ions:\n", | |
" try:\n", | |
" k = sql_lines.loc[ion].eq(nosql_lines.loc[ion]).sum().sum()\n", | |
" if len(sql_lines.loc[ion])*8 != k:\n", | |
" print(ion)\n", | |
" \n", | |
" except (KeyError, TypeError, ValueError):\n", | |
" pass" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Works really good." | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.7" | |
}, | |
"widgets": { | |
"application/vnd.jupyter.widget-state+json": { | |
"state": {}, | |
"version_major": 2, | |
"version_minor": 0 | |
} | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment