Skip to content

Instantly share code, notes, and snippets.

@onurmatik
Last active July 1, 2022 05:55
Show Gist options
  • Save onurmatik/a20ad48ba56b4583ae82ece35535a312 to your computer and use it in GitHub Desktop.
Save onurmatik/a20ad48ba56b4583ae82ece35535a312 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 14,
"id": "b5054d07-1454-434b-bce4-0f5d5fe9065f",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from lxml import etree"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "1e6b594d-7181-4b82-97ff-dce1e80681ef",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>price</th>\n",
" <th>description</th>\n",
" <th>calories</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Belgian Waffles</td>\n",
" <td>$5.95</td>\n",
" <td>Two of our famous Belgian Waffles with plenty ...</td>\n",
" <td>650</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Strawberry Belgian Waffles</td>\n",
" <td>$7.95</td>\n",
" <td>Light Belgian waffles covered with strawberrie...</td>\n",
" <td>900</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Berry-Berry Belgian Waffles</td>\n",
" <td>$8.95</td>\n",
" <td>Light Belgian waffles covered with an assortme...</td>\n",
" <td>900</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>French Toast</td>\n",
" <td>$4.50</td>\n",
" <td>Thick slices made from our homemade sourdough ...</td>\n",
" <td>600</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Homestyle Breakfast</td>\n",
" <td>$6.95</td>\n",
" <td>Two eggs, bacon or sausage, toast, and our eve...</td>\n",
" <td>950</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name price \\\n",
"0 Belgian Waffles $5.95 \n",
"1 Strawberry Belgian Waffles $7.95 \n",
"2 Berry-Berry Belgian Waffles $8.95 \n",
"3 French Toast $4.50 \n",
"4 Homestyle Breakfast $6.95 \n",
"\n",
" description calories \n",
"0 Two of our famous Belgian Waffles with plenty ... 650 \n",
"1 Light Belgian waffles covered with strawberrie... 900 \n",
"2 Light Belgian waffles covered with an assortme... 900 \n",
"3 Thick slices made from our homemade sourdough ... 600 \n",
"4 Two eggs, bacon or sausage, toast, and our eve... 950 "
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.read_excel('sample.xlsx')\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 51,
"id": "f29e4e26-c66c-4ae9-8914-881dc6657905",
"metadata": {},
"outputs": [],
"source": [
"df.to_xml('out.xml', index=False)"
]
},
{
"cell_type": "code",
"execution_count": 52,
"id": "9298e0ae-9473-4fcd-a31c-fe17592fec3b",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"b'<data>\\n <row>\\n <name>Belgian Waffles</name>\\n <price>$5.95</price>\\n <description>Two of our famous Belgian Waffles with plenty of real maple syrup</description>\\n <calories>650</calories>\\n </row>\\n <row>\\n <name>Strawberry Belgian Waffles</name>\\n <price>$7.95</price>\\n <description>Light Belgian waffles covered with strawberries and whipped cream</description>\\n <calories>900</calories>\\n </row>\\n <row>\\n <name>Berry-Berry Belgian Waffles</name>\\n <price>$8.95</price>\\n <description>Light Belgian waffles covered with an assortment of fresh berries and whipped cream</description>\\n <calories>900</calories>\\n </row>\\n <row>\\n <name>French Toast</name>\\n <price>$4.50</price>\\n <description>Thick slices made from our homemade sourdough bread</description>\\n <calories>600</calories>\\n </row>\\n <row>\\n <name>Homestyle Breakfast</name>\\n <price>$6.95</price>\\n <description>Two eggs, bacon or sausage, toast, and our ever-popular hash browns</description>\\n <calories>950</calories>\\n </row>\\n</data>\\n'"
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"xml = etree.parse('out.xml')\n",
"etree.tostring(xml, pretty_print=True)"
]
},
{
"cell_type": "code",
"execution_count": 49,
"id": "6ed993b5-3253-40fe-aa4c-da2bb3320e2f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"validator = etree.XMLSchema(file=\"sample.xsd\")\n",
"validator.validate(xml)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "highwire-stats-venv",
"language": "python",
"name": "highwire-stats-venv"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
@onurmatik
Copy link
Author

onurmatik commented Jul 1, 2022

The XSD document is here.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment