Skip to content

Instantly share code, notes, and snippets.

@ColinTalbert
Created January 16, 2018 15:08
Show Gist options
  • Save ColinTalbert/f3af9904b0a48ebd4bfa906757b4ba7d to your computer and use it in GitHub Desktop.
Save ColinTalbert/f3af9904b0a48ebd4bfa906757b4ba7d to your computer and use it in GitHub Desktop.
eainfo to csv Example
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Demo of how to use the MetadataWizard to convert detailed section to a csv"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from pymdwizard.core import xml_utils"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"fname = r\"C:\\Users\\talbertc\\Downloads\\Woody riparian invasion in Colorado.xml\"\n",
"md = xml_utils.XMLRecord(fname)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### There's a lot of variablility in how an detailed section can be structured. \n",
"This won't work in all cases, multiple domain types in a single atribute, etc."
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"cols = ['attrlabl', 'attrdef', 'attrdomv', 'values']\n",
"\n",
"rows = []\n",
"for attr in md.metadata.eainfo.detailed.attr:\n",
" attrlabl = attr.attrlabl.text\n",
" attrdef = attr.attrdef.text\n",
" \n",
" attrdomv = attr.xpath('attrdomv', as_list=True)[0].children[0].tag\n",
" \n",
" if attrdomv == \"rdom\":\n",
" minvalue = attr.attrdomv.rdom.rdommin.text\n",
" maxvalue = attr.attrdomv.rdom.rdommin.text\n",
" values = \"|\".join([minvalue, maxvalue])\n",
" elif attrdomv == \"edom\":\n",
" values = \"|\".join([attrdomv.edom.edomv.text for attrdomv in attr.xpath('attrdomv', as_list=True)])\n",
" elif attrdomv == \"udom\":\n",
" values = attr.attrdomv.udom.text\n",
" \n",
" row = [attrlabl, attrdef, attrdomv, values]\n",
" rows.append(row)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>attrlabl</th>\n",
" <th>attrdef</th>\n",
" <th>attrdomv</th>\n",
" <th>values</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Site ID</td>\n",
" <td>Unique identifier assigned to each site</td>\n",
" <td>udom</td>\n",
" <td>The unique identifier assigned to each site co...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Latitude</td>\n",
" <td>Latitude of the site</td>\n",
" <td>rdom</td>\n",
" <td>33.1483333333|33.1483333333</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Longitude</td>\n",
" <td>Longitude of the site</td>\n",
" <td>rdom</td>\n",
" <td>101.887972222|101.887972222</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Location description</td>\n",
" <td>Description of the location of the site</td>\n",
" <td>udom</td>\n",
" <td>Descriptions of the site locations are from th...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>River basin</td>\n",
" <td>Major river basin in which the site is located</td>\n",
" <td>edom</td>\n",
" <td>South Platte|upper Arkansas|upper/middle Rio G...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" attrlabl attrdef \\\n",
"0 Site ID Unique identifier assigned to each site \n",
"1 Latitude Latitude of the site \n",
"2 Longitude Longitude of the site \n",
"3 Location description Description of the location of the site \n",
"4 River basin Major river basin in which the site is located \n",
"\n",
" attrdomv values \n",
"0 udom The unique identifier assigned to each site co... \n",
"1 rdom 33.1483333333|33.1483333333 \n",
"2 rdom 101.887972222|101.887972222 \n",
"3 udom Descriptions of the site locations are from th... \n",
"4 edom South Platte|upper Arkansas|upper/middle Rio G... "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"df = pd.DataFrame.from_records(rows, columns=cols)\n",
"df.to_csv(r\"c:\\temp\\out.csv\", index=False)\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment