xflr6/Pandas_read_sparql_query.ipynb

## Pandas_read_sparql_query.ipynb
{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "1d935d67-aab3-49d1-b0e3-d82acd9e3329",
   "metadata": {},
   "source": [
    "# pandas `read_sparql_query()`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "5ebd1ea3-c44f-4b56-9ab9-58f58620f643",
   "metadata": {},
   "outputs": [],
   "source": [
    "from __future__ import annotations\n",
    "\n",
    "import datetime\n",
    "import decimal\n",
    "import distutils.util\n",
    "import functools\n",
    "import io\n",
    "import logging\n",
    "import types\n",
    "import urllib.parse\n",
    "import urllib.request\n",
    "import warnings\n",
    "import xml.etree.ElementTree as etree\n",
    "\n",
    "import pandas as pd\n",
    "import rdflib\n",
    "import SPARQLWrapper as sparqlwrapper"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "01c3b56a-7318-4999-8a5c-a2464921bbbb",
   "metadata": {},
   "outputs": [],
   "source": [
    "ENDPOINT = 'https://query.wikidata.org/sparql'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "32fcfb65-45d5-4467-b88c-e59ea43890a1",
   "metadata": {},
   "outputs": [],
   "source": [
    "QUERY = '''\n",
    "SELECT\n",
    "  ?glottocode\n",
    "  (strafter(str(?languoid), str(wd:)) AS ?qid)\n",
    "  (?languoidLabel AS ?name)\n",
    "  (strafter(str(?siteLink), \"https://en.wikipedia.org/wiki/\") AS ?title)\n",
    "WHERE {\n",
    "  ?languoid wdt:P1394 ?glottocode.\n",
    "  FILTER (REGEX(?glottocode, \"^[a-z0-9]{4}[0-9]{4}$\")).\n",
    "  OPTIONAL {\n",
    "    ?siteLink schema:about ?languoid;\n",
    "              schema:inLanguage \"en\";\n",
    "              schema:isPartOf <https://en.wikipedia.org/>.\n",
    "  }\n",
    "  SERVICE wikibase:label {\n",
    "    bd:serviceParam wikibase:language \"en\".\n",
    "    ?languoid rdfs:label ?languoidLabel.\n",
    "  }\n",
    "}\n",
    "ORDER BY\n",
    "  ?glottocode\n",
    "  xsd:integer(strafter(str(?languoid), str(wd:Q)))\n",
    "LIMIT 15000\n",
    "'''.strip()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "4ad0ee4a-44d0-4e24-a2c0-5be14d846795",
   "metadata": {},
   "outputs": [],
   "source": [
    "TEST_QUERY = '''\n",
    "SELECT ?index ?string ?integer ?boolean ?decimal ?double ?float ?datetime ?date ?time\n",
    "WHERE {\n",
    "  VALUES ?index { 0 }\n",
    "  VALUES ?string { \"spam\" \"eggs\"^^xsd:string }\n",
    "  VALUES ?integer { 7 \"42\"^^xsd:integer }\n",
    "  VALUES ?boolean { true false }\n",
    "  VALUES ?decimal { 1.3 }\n",
    "  VALUES ?double { 1.0e6 }\n",
    "  VALUES ?float { \"6.275\"^^xsd:float }\n",
    "  VALUES ?datetime { \"2005-04-04T04:04:04\"^^xsd:dateTime }\n",
    "  VALUES ?date { \"2001-01-01\"^^xsd:date }\n",
    "  VALUES ?time { \"18:30\"^^xsd:time }\n",
    "}\n",
    "'''.strip()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "beb198ed-6fc0-4a53-ba57-49f4cb9c060d",
   "metadata": {},
   "outputs": [],
   "source": [
    "LOGLEVEL = logging.INFO"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "3bea886d-f8e3-49e0-9b9a-8113f33f6035",
   "metadata": {},
   "outputs": [],
   "source": [
    "logging.basicConfig(format='[%(levelname)s@%(name)s] %(message)s', level=LOGLEVEL, force=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "86aede62-2837-4013-9801-3778a6c1a21b",
   "metadata": {},
   "outputs": [],
   "source": [
    "class QueryPandas:\n",
    "\n",
    "    def __init__(self, endpoint: str, **kwargs) -> None:\n",
    "        super().__init__(**kwargs)\n",
    "        self.endpoint=endpoint\n",
    "\n",
    "    def __repr__(self) -> str:\n",
    "        return f'{self.__class__.__name__}({self.endpoint!r})'\n",
    "    \n",
    "    def query_pandas(self, sparql: str, **kwargs) -> pd.DataFrame:\n",
    "        result = self.query(sparql)\n",
    "\n",
    "        df = self.read_sparql_query_result(result, **kwargs)\n",
    "\n",
    "        with io.StringIO() as f:\n",
    "            df.info(buf=f, memory_usage='deep')\n",
    "            logging.debug(f.getvalue())\n",
    "\n",
    "        return df\n",
    "\n",
    "    def query(self, sparql: str):\n",
    "        raise NotImplementedError        \n",
    "    \n",
    "    def read_sparql_query_result(self, result, **kwargs) -> pd.DataFrame:\n",
    "        raise NotImplementedError"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "44f9aadc-8d2c-4a59-a5b7-f58cc14db1d6",
   "metadata": {},
   "outputs": [],
   "source": [
    "def pd_dataframe_from_records(records, **kwargs) -> pd.DataFrame:\n",
    "    kwargs.setdefault('coerce_float', True)\n",
    "    logging.info('pandas.DataFrame.from_records(%r, **%r)', records, kwargs)\n",
    "    return pd.DataFrame.from_records(records, **kwargs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "a96dcb4d-ae5a-4b84-b0dd-5ea6bfc859c1",
   "metadata": {},
   "outputs": [],
   "source": [
    "def pd_read_csv(data, **kwargs) -> pd.DataFrame:\n",
    "    kwargs.setdefault('na_values', '')\n",
    "    kwargs.setdefault('keep_default_na', False)\n",
    "\n",
    "    if kwargs.get('encoding') is None:\n",
    "        content_type = data.info()['content-type']\n",
    "        kwargs['encoding'] = get_encoding(content_type)\n",
    "    logging.debug('encoding: %r', kwargs['encoding'])\n",
    "\n",
    "    logging.info('pandas.read_csv(%r, **%r)', data, kwargs)\n",
    "    return pd.read_csv(data, **kwargs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "3366b708-968f-4e85-8c25-6cb0819e0ab2",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_encoding(content_type: str) -> str: \n",
    "    _, sep, encoding = content_type.partition(';charset=')\n",
    "    assert sep and encoding\n",
    "    return encoding"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "bea6da0c-6b5e-4998-b97c-002af7e41e89",
   "metadata": {},
   "outputs": [],
   "source": [
    "def pd_json_normalize(jsondata, **kwargs) -> pd.DataFrame:\n",
    "    kwargs.setdefault('record_path', ['results', 'bindings'])\n",
    "    logging.info('pandas.json_normalize(jsondata, **%r)', kwargs)\n",
    "    return pd.json_normalize(jsondata, **kwargs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "9f0ca230-7848-4ec1-92b4-70389cfcca02",
   "metadata": {},
   "outputs": [],
   "source": [
    "def pipe_info(df) -> pd.DataFrame:\n",
    "    df.info(memory_usage='deep')\n",
    "    return df"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "be2d9827-0273-462c-b712-7d9d276694ee",
   "metadata": {},
   "source": [
    "## urllib"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "03b875f6-397f-41a6-9479-d9889013b66e",
   "metadata": {},
   "outputs": [],
   "source": [
    "class QueryUrllib(QueryPandas):\n",
    "\n",
    "    method = 'GET'\n",
    "    \n",
    "    headers = None\n",
    "    \n",
    "    def query(self, sparql: str, *, method=None, headers=None):\n",
    "        logging.info('endpoint: %r', self.endpoint)\n",
    "        request_url = (urllib.parse.urlparse(self.endpoint)\n",
    "                       ._replace(query=urllib.parse.urlencode({'query': sparql})))\n",
    "\n",
    "        request = urllib.request.Request(request_url.geturl(),\n",
    "                                         method=method if method is not None else self.method,\n",
    "                                         headers=headers if headers is not None else self.headers)\n",
    "        logging.info('request: %r %r', request.method, request)\n",
    "        logging.debug('url: %r', request.full_url)\n",
    "\n",
    "        response = urllib.request.urlopen(request)\n",
    "        logging.info('response: %r %r', response.code, response)\n",
    "        logging.info('content-type: %r', response.headers['content-type'])\n",
    "        logging.debug('headers: %r', dict(response.headers))\n",
    "\n",
    "        return types.SimpleNamespace(response=response, headers=response.headers, info=response.info)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4c85659a-a85d-4cd1-8d41-74346c44106e",
   "metadata": {
    "tags": []
   },
   "source": [
    "### ``CsvUrllibStrategy``"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "e10dfe89-4693-4409-8993-7e73f95c2718",
   "metadata": {},
   "outputs": [],
   "source": [
    "def read_sparql_query_csv(result, **kwargs) -> pd.DataFrame:\n",
    "    return pd_read_csv(result.response, **kwargs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "aebf75f8-9a76-400a-8b03-af6f232816a5",
   "metadata": {},
   "outputs": [],
   "source": [
    "class CsvUrllibStrategy(QueryUrllib):\n",
    "\n",
    "    headers = {'Accept': 'text/csv'}\n",
    "\n",
    "    read_sparql_query_result = staticmethod(read_sparql_query_csv)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "a6b910e8-b162-4922-81ca-30dcf2711208",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[INFO@root] endpoint: 'https://query.wikidata.org/sparql'\n",
      "[INFO@root] request: 'GET' <urllib.request.Request object at 0x000001E1EEB98F40>\n",
      "[INFO@root] response: 200 <http.client.HTTPResponse object at 0x000001E1EEB9A590>\n",
      "[INFO@root] content-type: 'text/csv;charset=utf-8'\n",
      "[INFO@root] pandas.read_csv(<http.client.HTTPResponse object at 0x000001E1EEB9A590>, **{'index_col': 'index', 'na_values': '', 'keep_default_na': False, 'encoding': 'utf-8'})\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 8 entries, 0 to 0\n",
      "Data columns (total 9 columns):\n",
      " #   Column    Non-Null Count  Dtype         \n",
      "---  ------    --------------  -----         \n",
      " 0   string    8 non-null      object        \n",
      " 1   integer   8 non-null      int64         \n",
      " 2   boolean   8 non-null      bool          \n",
      " 3   decimal   8 non-null      float64       \n",
      " 4   double    8 non-null      float64       \n",
      " 5   float     8 non-null      float64       \n",
      " 6   datetime  8 non-null      datetime64[ns]\n",
      " 7   date      8 non-null      object        \n",
      " 8   time      8 non-null      object        \n",
      "dtypes: bool(1), datetime64[ns](1), float64(3), int64(1), object(3)\n",
      "memory usage: 1.9 KB\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>string</th>\n",
       "      <th>integer</th>\n",
       "      <th>boolean</th>\n",
       "      <th>decimal</th>\n",
       "      <th>double</th>\n",
       "      <th>float</th>\n",
       "      <th>datetime</th>\n",
       "      <th>date</th>\n",
       "      <th>time</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>index</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'spam'</td>\n",
       "      <td>7</td>\n",
       "      <td>True</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>'2001-01-01'</td>\n",
       "      <td>'18:30'</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'spam'</td>\n",
       "      <td>7</td>\n",
       "      <td>False</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>'2001-01-01'</td>\n",
       "      <td>'18:30'</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'spam'</td>\n",
       "      <td>42</td>\n",
       "      <td>True</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>'2001-01-01'</td>\n",
       "      <td>'18:30'</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'spam'</td>\n",
       "      <td>42</td>\n",
       "      <td>False</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>'2001-01-01'</td>\n",
       "      <td>'18:30'</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'eggs'</td>\n",
       "      <td>7</td>\n",
       "      <td>True</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>'2001-01-01'</td>\n",
       "      <td>'18:30'</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'eggs'</td>\n",
       "      <td>7</td>\n",
       "      <td>False</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>'2001-01-01'</td>\n",
       "      <td>'18:30'</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'eggs'</td>\n",
       "      <td>42</td>\n",
       "      <td>True</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>'2001-01-01'</td>\n",
       "      <td>'18:30'</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'eggs'</td>\n",
       "      <td>42</td>\n",
       "      <td>False</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>'2001-01-01'</td>\n",
       "      <td>'18:30'</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       string integer boolean decimal     double  float  \\\n",
       "index                                                     \n",
       "0      'spam'       7    True     1.3  1000000.0  6.275   \n",
       "0      'spam'       7   False     1.3  1000000.0  6.275   \n",
       "0      'spam'      42    True     1.3  1000000.0  6.275   \n",
       "0      'spam'      42   False     1.3  1000000.0  6.275   \n",
       "0      'eggs'       7    True     1.3  1000000.0  6.275   \n",
       "0      'eggs'       7   False     1.3  1000000.0  6.275   \n",
       "0      'eggs'      42    True     1.3  1000000.0  6.275   \n",
       "0      'eggs'      42   False     1.3  1000000.0  6.275   \n",
       "\n",
       "                               datetime          date     time  \n",
       "index                                                           \n",
       "0      Timestamp('2005-04-04 04:04:04')  '2001-01-01'  '18:30'  \n",
       "0      Timestamp('2005-04-04 04:04:04')  '2001-01-01'  '18:30'  \n",
       "0      Timestamp('2005-04-04 04:04:04')  '2001-01-01'  '18:30'  \n",
       "0      Timestamp('2005-04-04 04:04:04')  '2001-01-01'  '18:30'  \n",
       "0      Timestamp('2005-04-04 04:04:04')  '2001-01-01'  '18:30'  \n",
       "0      Timestamp('2005-04-04 04:04:04')  '2001-01-01'  '18:30'  \n",
       "0      Timestamp('2005-04-04 04:04:04')  '2001-01-01'  '18:30'  \n",
       "0      Timestamp('2005-04-04 04:04:04')  '2001-01-01'  '18:30'  "
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(CsvUrllibStrategy(ENDPOINT).query_pandas(TEST_QUERY, index_col='index')\n",
    " .astype({'datetime': 'datetime64'}).pipe(pipe_info).applymap(repr))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "911ce4dd-3ea8-4cb7-b552-0c3a21b2e2eb",
   "metadata": {},
   "source": [
    "### ``XmlUrllibStrategy``"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "b915473b-14f1-4bd6-841c-56b1bce34192",
   "metadata": {},
   "outputs": [],
   "source": [
    "SPARQL_RESULTS = 'http://www.w3.org/2005/sparql-results#'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "4e57a676-dd62-4bc3-94df-0c6c13db6f62",
   "metadata": {},
   "outputs": [],
   "source": [
    "def extract_ns(tag) -> str:\n",
    "    ns = tag.partition('{')[2].partition('}')[0]\n",
    "    assert tag.startswith(f'{{{ns}}}')\n",
    "    return ns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "87447a7e-d491-4008-ae06-00433b93dedb",
   "metadata": {},
   "outputs": [],
   "source": [
    "NS = f'{{{SPARQL_RESULTS}}}'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "ff74fd77-d64e-4434-9eaf-488b3f75069d",
   "metadata": {},
   "outputs": [],
   "source": [
    "def read_sparql_query_xml(result, *, encoding=None, **kwargs) -> pd.DataFrame:\n",
    "    if encoding is None:\n",
    "        content_type = result.info()['content-type']\n",
    "        encoding = get_encoding(content_type)\n",
    "    logging.debug('encoding: %r', encoding)\n",
    "\n",
    "    pairs = etree.iterparse(result.response, events=('start', 'end'),\n",
    "                            parser=etree.XMLParser(encoding=encoding))\n",
    "    _, root = next(pairs)\n",
    "    logging.info('xml: %r', root)\n",
    "\n",
    "    ns = extract_ns(root.tag)\n",
    "    if ns != SPARQL_RESULTS:\n",
    "        raise ValueError(f'error: unknown xml namespace {ns!r} (expected: {SPARQL_RESULTS!r})')\n",
    "    ns = f'{{{ns}}}'\n",
    "\n",
    "    sparql_root = f'{ns}sparql'\n",
    "    if root.tag != sparql_root:\n",
    "        raise ValueError(f'error: invalid xml root tag {root.tag!r} (expected: {sparql_root!r})')\n",
    "\n",
    "    sparql_head = f'{ns}head'\n",
    "    head = next(elem for event, elem in pairs if event == 'end' and elem.tag == sparql_head)\n",
    "    variables = [variable.attrib['name'] for variable in head.findall(f'{ns}variable')]\n",
    "    kwargs.setdefault('columns', variables)\n",
    "\n",
    "    records = iterrecords(root, pairs, variables=variables)\n",
    "    return pd_dataframe_from_records(records, **kwargs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "5d3903f1-b6a8-4049-a079-15fe856357ba",
   "metadata": {},
   "outputs": [],
   "source": [
    "def iterrecords(root, pairs, *, variables, sparql_result: str = f'{NS}result'):\n",
    "    sparql_values = [f'{NS}binding[@name=\"{name}\"]/' for name in variables]\n",
    "    for event, elem in pairs:\n",
    "        if event != 'end':\n",
    "            pass\n",
    "        elif elem.tag == sparql_result:        \n",
    "            value_elements = map(elem.find, sparql_values)\n",
    "            yield tuple(itervalues(value_elements))\n",
    "            root.clear()\n",
    "        elif elem.tag == root.tag:\n",
    "            assert next(pairs, None) is None\n",
    "            return"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "32b4e966-a4dd-4d83-8ba3-7f2a93cb844e",
   "metadata": {},
   "outputs": [],
   "source": [
    "def itervalues(value_elements, *,\n",
    "               sparql_literal: str = f'{NS}literal',\n",
    "               sparql_uri: str = f'{NS}uri',\n",
    "               sparql_bnode: str = f'{NS}bnode',):\n",
    "    for value_elem in value_elements:\n",
    "        if value_elem is None:\n",
    "            yield None\n",
    "            continue\n",
    "\n",
    "        if value_elem.tag not in (sparql_literal, sparql_uri, sparql_bnode):\n",
    "            raise ValueError(f'invalid binding value tag: {value_elem.tag!r}')\n",
    "        value = value_elem.text\n",
    "        if value_elem.tag == sparql_literal and 'datatype' in value_elem.attrib:\n",
    "            datatype = value_elem.attrib['datatype']\n",
    "            if datatype is not None:\n",
    "                try:\n",
    "                    parse_value = PARSE_FUNC[datatype]\n",
    "                except KeyError:\n",
    "                    warnings.warn(f'cannot convert unknown datatype: {datatype!r}')\n",
    "                else:\n",
    "                    value = parse_value(value)\n",
    "        yield value"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "0b4f1fab-4f46-4c6b-b107-bffc6388f775",
   "metadata": {},
   "outputs": [],
   "source": [
    "XSD = 'http://www.w3.org/2001/XMLSchema#'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "2d68fcef-ef94-4b3a-9dc5-de49b212cce0",
   "metadata": {},
   "outputs": [],
   "source": [
    "PARSE_FUNC = {f'{XSD}string': lambda x: x,\n",
    "              f'{XSD}integer': int,\n",
    "              f'{XSD}boolean': lambda x: bool(distutils.util.strtobool(x)),\n",
    "              f'{XSD}decimal': decimal.Decimal,\n",
    "              f'{XSD}double': float,\n",
    "              f'{XSD}float': float,\n",
    "              f'{XSD}dateTime': datetime.datetime.fromisoformat,\n",
    "              f'{XSD}date': datetime.date.fromisoformat,\n",
    "              f'{XSD}time': datetime.time.fromisoformat}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "29bf61ef-0328-478a-a877-30fbd45eeeca",
   "metadata": {},
   "outputs": [],
   "source": [
    "class XmlUrllibStrategy(QueryUrllib):\n",
    "\n",
    "    headers = {'Accept': 'application/sparql-results+xml'}\n",
    "\n",
    "    read_sparql_query_result = staticmethod(read_sparql_query_xml)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "95d5889f-e830-4b68-839d-5558d8fcc959",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[INFO@root] endpoint: 'https://query.wikidata.org/sparql'\n",
      "[INFO@root] request: 'GET' <urllib.request.Request object at 0x000001E1EEC03C10>\n",
      "[INFO@root] response: 200 <http.client.HTTPResponse object at 0x000001E1EEC01F60>\n",
      "[INFO@root] content-type: 'application/sparql-results+xml;charset=utf-8'\n",
      "[INFO@root] xml: <Element '{http://www.w3.org/2005/sparql-results#}sparql' at 0x000001E1EEC3DB70>\n",
      "[INFO@root] pandas.DataFrame.from_records(<generator object iterrecords at 0x000001E1EEBABBC0>, **{'index': 'index', 'columns': ['index', 'string', 'integer', 'boolean', 'decimal', 'double', 'float', 'datetime', 'date', 'time'], 'coerce_float': True})\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 8 entries, 0 to 0\n",
      "Data columns (total 9 columns):\n",
      " #   Column    Non-Null Count  Dtype         \n",
      "---  ------    --------------  -----         \n",
      " 0   string    8 non-null      object        \n",
      " 1   integer   8 non-null      int64         \n",
      " 2   boolean   8 non-null      bool          \n",
      " 3   decimal   8 non-null      float64       \n",
      " 4   double    8 non-null      float64       \n",
      " 5   float     8 non-null      float64       \n",
      " 6   datetime  8 non-null      datetime64[ns]\n",
      " 7   date      8 non-null      object        \n",
      " 8   time      8 non-null      object        \n",
      "dtypes: bool(1), datetime64[ns](1), float64(3), int64(1), object(3)\n",
      "memory usage: 1.5 KB\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>string</th>\n",
       "      <th>integer</th>\n",
       "      <th>boolean</th>\n",
       "      <th>decimal</th>\n",
       "      <th>double</th>\n",
       "      <th>float</th>\n",
       "      <th>datetime</th>\n",
       "      <th>date</th>\n",
       "      <th>time</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>index</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'spam'</td>\n",
       "      <td>7</td>\n",
       "      <td>True</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>datetime.date(2001, 1, 1)</td>\n",
       "      <td>datetime.time(18, 30)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'spam'</td>\n",
       "      <td>7</td>\n",
       "      <td>False</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>datetime.date(2001, 1, 1)</td>\n",
       "      <td>datetime.time(18, 30)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'spam'</td>\n",
       "      <td>42</td>\n",
       "      <td>True</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>datetime.date(2001, 1, 1)</td>\n",
       "      <td>datetime.time(18, 30)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'spam'</td>\n",
       "      <td>42</td>\n",
       "      <td>False</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>datetime.date(2001, 1, 1)</td>\n",
       "      <td>datetime.time(18, 30)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'eggs'</td>\n",
       "      <td>7</td>\n",
       "      <td>True</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>datetime.date(2001, 1, 1)</td>\n",
       "      <td>datetime.time(18, 30)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'eggs'</td>\n",
       "      <td>7</td>\n",
       "      <td>False</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>datetime.date(2001, 1, 1)</td>\n",
       "      <td>datetime.time(18, 30)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'eggs'</td>\n",
       "      <td>42</td>\n",
       "      <td>True</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>datetime.date(2001, 1, 1)</td>\n",
       "      <td>datetime.time(18, 30)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'eggs'</td>\n",
       "      <td>42</td>\n",
       "      <td>False</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>datetime.date(2001, 1, 1)</td>\n",
       "      <td>datetime.time(18, 30)</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       string integer boolean decimal     double  float  \\\n",
       "index                                                     \n",
       "0      'spam'       7    True     1.3  1000000.0  6.275   \n",
       "0      'spam'       7   False     1.3  1000000.0  6.275   \n",
       "0      'spam'      42    True     1.3  1000000.0  6.275   \n",
       "0      'spam'      42   False     1.3  1000000.0  6.275   \n",
       "0      'eggs'       7    True     1.3  1000000.0  6.275   \n",
       "0      'eggs'       7   False     1.3  1000000.0  6.275   \n",
       "0      'eggs'      42    True     1.3  1000000.0  6.275   \n",
       "0      'eggs'      42   False     1.3  1000000.0  6.275   \n",
       "\n",
       "                               datetime                       date  \\\n",
       "index                                                                \n",
       "0      Timestamp('2005-04-04 04:04:04')  datetime.date(2001, 1, 1)   \n",
       "0      Timestamp('2005-04-04 04:04:04')  datetime.date(2001, 1, 1)   \n",
       "0      Timestamp('2005-04-04 04:04:04')  datetime.date(2001, 1, 1)   \n",
       "0      Timestamp('2005-04-04 04:04:04')  datetime.date(2001, 1, 1)   \n",
       "0      Timestamp('2005-04-04 04:04:04')  datetime.date(2001, 1, 1)   \n",
       "0      Timestamp('2005-04-04 04:04:04')  datetime.date(2001, 1, 1)   \n",
       "0      Timestamp('2005-04-04 04:04:04')  datetime.date(2001, 1, 1)   \n",
       "0      Timestamp('2005-04-04 04:04:04')  datetime.date(2001, 1, 1)   \n",
       "\n",
       "                        time  \n",
       "index                         \n",
       "0      datetime.time(18, 30)  \n",
       "0      datetime.time(18, 30)  \n",
       "0      datetime.time(18, 30)  \n",
       "0      datetime.time(18, 30)  \n",
       "0      datetime.time(18, 30)  \n",
       "0      datetime.time(18, 30)  \n",
       "0      datetime.time(18, 30)  \n",
       "0      datetime.time(18, 30)  "
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "XmlUrllibStrategy(ENDPOINT).query_pandas(TEST_QUERY, index='index').pipe(pipe_info).applymap(repr)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a490428a-aeef-4a07-a886-66c992b031e9",
   "metadata": {},
   "source": [
    "## rdflib"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "6a78e26c-e9d9-408c-8726-35507a76f3af",
   "metadata": {},
   "outputs": [],
   "source": [
    "class SCHEMA(rdflib.SDO):\n",
    "    \"\"\"https://github.com/RDFLib/rdflib/issues/1120\"\"\"\n",
    "\n",
    "    _NS = rdflib.Namespace(rdflib.SDO._NS.replace('https://', 'http://'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "7919a1c7-e917-4f55-b7e7-fd950226616b",
   "metadata": {},
   "outputs": [],
   "source": [
    "def open_sparql_graph(endpoint: str, *, prefixes={'schema': SCHEMA}):\n",
    "    logging.info('endpoint: %r', endpoint)\n",
    "    graph = rdflib.ConjunctiveGraph('SPARQLStore')\n",
    "    graph.open(endpoint)\n",
    "    logging.info('graph: %s', graph)\n",
    "\n",
    "    logging.info('prefixes: %r', prefixes)\n",
    "    for prefix, namespace in prefixes.items():\n",
    "        graph.namespace_manager.bind(prefix, namespace, replace=True)\n",
    "\n",
    "    logging.debug('namespaces: %r', list(graph.namespaces()))\n",
    "    return graph"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e2a7cf6d-7d0d-471d-b09c-43be532a2eaf",
   "metadata": {},
   "source": [
    "### `rdflib.ConjunctiveGraph('SPARQLStore')`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "3ad3eee6-0a03-4a77-9972-858521ded602",
   "metadata": {},
   "outputs": [],
   "source": [
    "def read_sparql_query_graph(result, **kwargs) -> pd.DataFrame:\n",
    "    variables = [v.toPython().removeprefix('?') for v in result.vars]\n",
    "    kwargs.setdefault('columns', variables)\n",
    "    records = ([v.toPython() if v is not None else None for v in values]\n",
    "               for values in result)\n",
    "    return pd_dataframe_from_records(records, **kwargs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "771a038a-e21d-4415-b11a-720ce570b804",
   "metadata": {},
   "outputs": [],
   "source": [
    "class GraphXmlStrategy(QueryPandas):\n",
    "    \n",
    "    def __repr__(self) -> str:\n",
    "        return f'<{self.__class__.__name__} {self.endpoint!r} graph={self.graph!r}>'\n",
    "\n",
    "    @functools.cached_property\n",
    "    def graph(self):\n",
    "        return open_sparql_graph(self.endpoint)\n",
    "\n",
    "    def query(self, query: str):\n",
    "        result = self.graph.query(query)\n",
    "        logging.info('result: %r', result)\n",
    "        return result\n",
    "\n",
    "    read_sparql_query_result = staticmethod(read_sparql_query_graph)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "e711ce12-c947-4525-b8c5-e0ff656ff970",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[INFO@root] endpoint: 'https://query.wikidata.org/sparql'\n",
      "[INFO@root] graph: [a rdflib:ConjunctiveGraph;rdflib:storage [a rdflib:Store;rdfs:label 'SPARQLStore']]\n",
      "[INFO@root] prefixes: {'schema': Namespace(\"http://schema.org/\")}\n",
      "[INFO@root] result: <rdflib.plugins.sparql.results.xmlresults.XMLResult object at 0x000001E1EEC99990>\n",
      "[INFO@root] pandas.DataFrame.from_records(<generator object read_sparql_query_graph.<locals>.<genexpr> at 0x000001E1EF0D1310>, **{'index': 'index', 'columns': ['index', 'string', 'integer', 'boolean', 'decimal', 'double', 'float', 'datetime', 'date', 'time'], 'coerce_float': True})\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 8 entries, 0 to 0\n",
      "Data columns (total 9 columns):\n",
      " #   Column    Non-Null Count  Dtype         \n",
      "---  ------    --------------  -----         \n",
      " 0   string    8 non-null      object        \n",
      " 1   integer   8 non-null      int64         \n",
      " 2   boolean   8 non-null      bool          \n",
      " 3   decimal   8 non-null      float64       \n",
      " 4   double    8 non-null      float64       \n",
      " 5   float     8 non-null      float64       \n",
      " 6   datetime  8 non-null      datetime64[ns]\n",
      " 7   date      8 non-null      object        \n",
      " 8   time      8 non-null      object        \n",
      "dtypes: bool(1), datetime64[ns](1), float64(3), int64(1), object(3)\n",
      "memory usage: 1.5 KB\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>string</th>\n",
       "      <th>integer</th>\n",
       "      <th>boolean</th>\n",
       "      <th>decimal</th>\n",
       "      <th>double</th>\n",
       "      <th>float</th>\n",
       "      <th>datetime</th>\n",
       "      <th>date</th>\n",
       "      <th>time</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>index</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'spam'</td>\n",
       "      <td>7</td>\n",
       "      <td>True</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>datetime.date(2001, 1, 1)</td>\n",
       "      <td>datetime.time(18, 30)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'spam'</td>\n",
       "      <td>7</td>\n",
       "      <td>False</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>datetime.date(2001, 1, 1)</td>\n",
       "      <td>datetime.time(18, 30)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'spam'</td>\n",
       "      <td>42</td>\n",
       "      <td>True</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>datetime.date(2001, 1, 1)</td>\n",
       "      <td>datetime.time(18, 30)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'spam'</td>\n",
       "      <td>42</td>\n",
       "      <td>False</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>datetime.date(2001, 1, 1)</td>\n",
       "      <td>datetime.time(18, 30)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'eggs'</td>\n",
       "      <td>7</td>\n",
       "      <td>True</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>datetime.date(2001, 1, 1)</td>\n",
       "      <td>datetime.time(18, 30)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'eggs'</td>\n",
       "      <td>7</td>\n",
       "      <td>False</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>datetime.date(2001, 1, 1)</td>\n",
       "      <td>datetime.time(18, 30)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'eggs'</td>\n",
       "      <td>42</td>\n",
       "      <td>True</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>datetime.date(2001, 1, 1)</td>\n",
       "      <td>datetime.time(18, 30)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'eggs'</td>\n",
       "      <td>42</td>\n",
       "      <td>False</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>datetime.date(2001, 1, 1)</td>\n",
       "      <td>datetime.time(18, 30)</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       string integer boolean decimal     double  float  \\\n",
       "index                                                     \n",
       "0      'spam'       7    True     1.3  1000000.0  6.275   \n",
       "0      'spam'       7   False     1.3  1000000.0  6.275   \n",
       "0      'spam'      42    True     1.3  1000000.0  6.275   \n",
       "0      'spam'      42   False     1.3  1000000.0  6.275   \n",
       "0      'eggs'       7    True     1.3  1000000.0  6.275   \n",
       "0      'eggs'       7   False     1.3  1000000.0  6.275   \n",
       "0      'eggs'      42    True     1.3  1000000.0  6.275   \n",
       "0      'eggs'      42   False     1.3  1000000.0  6.275   \n",
       "\n",
       "                               datetime                       date  \\\n",
       "index                                                                \n",
       "0      Timestamp('2005-04-04 04:04:04')  datetime.date(2001, 1, 1)   \n",
       "0      Timestamp('2005-04-04 04:04:04')  datetime.date(2001, 1, 1)   \n",
       "0      Timestamp('2005-04-04 04:04:04')  datetime.date(2001, 1, 1)   \n",
       "0      Timestamp('2005-04-04 04:04:04')  datetime.date(2001, 1, 1)   \n",
       "0      Timestamp('2005-04-04 04:04:04')  datetime.date(2001, 1, 1)   \n",
       "0      Timestamp('2005-04-04 04:04:04')  datetime.date(2001, 1, 1)   \n",
       "0      Timestamp('2005-04-04 04:04:04')  datetime.date(2001, 1, 1)   \n",
       "0      Timestamp('2005-04-04 04:04:04')  datetime.date(2001, 1, 1)   \n",
       "\n",
       "                        time  \n",
       "index                         \n",
       "0      datetime.time(18, 30)  \n",
       "0      datetime.time(18, 30)  \n",
       "0      datetime.time(18, 30)  \n",
       "0      datetime.time(18, 30)  \n",
       "0      datetime.time(18, 30)  \n",
       "0      datetime.time(18, 30)  \n",
       "0      datetime.time(18, 30)  \n",
       "0      datetime.time(18, 30)  "
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "GraphXmlStrategy(ENDPOINT).query_pandas(TEST_QUERY, index='index').pipe(pipe_info).applymap(repr)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6d30940d-4cde-44ee-a767-a98a538dbb08",
   "metadata": {},
   "source": [
    "## SPARQLWrapper"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "62fed472-860d-4ea5-bf37-0f93eaebffd8",
   "metadata": {},
   "outputs": [],
   "source": [
    "class QuerySPARQLWrapper:\n",
    "\n",
    "    def __repr__(self) -> str:\n",
    "        return f'{self.__class__.__name__}({self.endpoint!r}, returnFormat={self.returnFormat!r})'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "db4a18f8-d06a-473c-bded-65bfe509eefa",
   "metadata": {},
   "outputs": [],
   "source": [
    "class QueryService(QuerySPARQLWrapper, sparqlwrapper.SPARQLWrapper, QueryPandas):\n",
    "    \"\"\"Query service.\"\"\"\n",
    "\n",
    "    def query(self, sparql: str):\n",
    "        logging.info('endpoint: %r', self.endpoint)\n",
    "        logging.info('returnFormat: %r', self.returnFormat)\n",
    "        if self.returnFormat in (sparqlwrapper.CSV, sparqlwrapper.TSV):\n",
    "            logging.info('%r.setOnlyConnreg(True)', self)\n",
    "            self.setOnlyConneg(True)\n",
    "        elif not self.supportsReturnFormat(self.returnFormat):\n",
    "            raise ValueError('unsupposted return format: %r', self.returnFormat)\n",
    "        self.setQuery(sparql)\n",
    "        result = super().query()\n",
    "        logging.info('result: %r', result)\n",
    "        headers = result.info()\n",
    "        logging.debug('headers: %r', headers)\n",
    "        logging.info('content_type: %r', headers['content-type'])\n",
    "        return result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "a3b5dd29-ca50-4088-a568-90bac6d604fc",
   "metadata": {},
   "outputs": [],
   "source": [
    "class JsonReturnFormatQueryService(QuerySPARQLWrapper, sparqlwrapper.SPARQLWrapper2,  QueryPandas):\n",
    "    \"\"\"Query service with JSON return format.\"\"\"\n",
    "    \n",
    "    def query(self, sparql: str):\n",
    "        logging.info('endpoint: %r', self.endpoint)\n",
    "        self.setQuery(sparql)\n",
    "        result = super().query()\n",
    "        logging.info('result: %r', result)\n",
    "        logging.info('result.variables: %r', result.variables)\n",
    "        return result"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ca4321ee-3a45-4c36-8a4c-617a13a5140b",
   "metadata": {},
   "source": [
    "### `SPARQLWrapper.SPARQLWrapper(returnFormat='csv')`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "fb82bc0a-8fea-4478-9eb7-8d7af008dbf0",
   "metadata": {},
   "outputs": [],
   "source": [
    "class CsvSwStrategy(QueryService):\n",
    "\n",
    "    def __init__(self, endpoint: str, *, returnFormat=sparqlwrapper.CSV, **kwargs) -> None:\n",
    "        super().__init__(endpoint, returnFormat=returnFormat, **kwargs)\n",
    "\n",
    "    read_sparql_query_result = staticmethod(read_sparql_query_csv)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "0e59b728-e7dc-426a-8c85-cbd6fdc03c44",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[INFO@root] endpoint: 'https://query.wikidata.org/sparql'\n",
      "[INFO@root] returnFormat: 'csv'\n",
      "[INFO@root] CsvSwStrategy('https://query.wikidata.org/sparql', returnFormat='csv').setOnlyConnreg(True)\n",
      "[INFO@root] result: <SPARQLWrapper.Wrapper.QueryResult object at 0x000001E1EF0FBB80>\n",
      "[INFO@root] content_type: 'text/csv;charset=utf-8'\n",
      "[INFO@root] pandas.read_csv(<http.client.HTTPResponse object at 0x000001E1EF0FBCD0>, **{'index_col': 'index', 'na_values': '', 'keep_default_na': False, 'encoding': 'utf-8'})\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 8 entries, 0 to 0\n",
      "Data columns (total 9 columns):\n",
      " #   Column    Non-Null Count  Dtype         \n",
      "---  ------    --------------  -----         \n",
      " 0   string    8 non-null      object        \n",
      " 1   integer   8 non-null      int64         \n",
      " 2   boolean   8 non-null      bool          \n",
      " 3   decimal   8 non-null      float64       \n",
      " 4   double    8 non-null      float64       \n",
      " 5   float     8 non-null      float64       \n",
      " 6   datetime  8 non-null      datetime64[ns]\n",
      " 7   date      8 non-null      object        \n",
      " 8   time      8 non-null      object        \n",
      "dtypes: bool(1), datetime64[ns](1), float64(3), int64(1), object(3)\n",
      "memory usage: 1.9 KB\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>string</th>\n",
       "      <th>integer</th>\n",
       "      <th>boolean</th>\n",
       "      <th>decimal</th>\n",
       "      <th>double</th>\n",
       "      <th>float</th>\n",
       "      <th>datetime</th>\n",
       "      <th>date</th>\n",
       "      <th>time</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>index</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'spam'</td>\n",
       "      <td>7</td>\n",
       "      <td>True</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>'2001-01-01'</td>\n",
       "      <td>'18:30'</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'spam'</td>\n",
       "      <td>7</td>\n",
       "      <td>False</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>'2001-01-01'</td>\n",
       "      <td>'18:30'</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'spam'</td>\n",
       "      <td>42</td>\n",
       "      <td>True</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>'2001-01-01'</td>\n",
       "      <td>'18:30'</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'spam'</td>\n",
       "      <td>42</td>\n",
       "      <td>False</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>'2001-01-01'</td>\n",
       "      <td>'18:30'</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'eggs'</td>\n",
       "      <td>7</td>\n",
       "      <td>True</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>'2001-01-01'</td>\n",
       "      <td>'18:30'</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'eggs'</td>\n",
       "      <td>7</td>\n",
       "      <td>False</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>'2001-01-01'</td>\n",
       "      <td>'18:30'</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'eggs'</td>\n",
       "      <td>42</td>\n",
       "      <td>True</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>'2001-01-01'</td>\n",
       "      <td>'18:30'</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'eggs'</td>\n",
       "      <td>42</td>\n",
       "      <td>False</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>'2001-01-01'</td>\n",
       "      <td>'18:30'</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       string integer boolean decimal     double  float  \\\n",
       "index                                                     \n",
       "0      'spam'       7    True     1.3  1000000.0  6.275   \n",
       "0      'spam'       7   False     1.3  1000000.0  6.275   \n",
       "0      'spam'      42    True     1.3  1000000.0  6.275   \n",
       "0      'spam'      42   False     1.3  1000000.0  6.275   \n",
       "0      'eggs'       7    True     1.3  1000000.0  6.275   \n",
       "0      'eggs'       7   False     1.3  1000000.0  6.275   \n",
       "0      'eggs'      42    True     1.3  1000000.0  6.275   \n",
       "0      'eggs'      42   False     1.3  1000000.0  6.275   \n",
       "\n",
       "                               datetime          date     time  \n",
       "index                                                           \n",
       "0      Timestamp('2005-04-04 04:04:04')  '2001-01-01'  '18:30'  \n",
       "0      Timestamp('2005-04-04 04:04:04')  '2001-01-01'  '18:30'  \n",
       "0      Timestamp('2005-04-04 04:04:04')  '2001-01-01'  '18:30'  \n",
       "0      Timestamp('2005-04-04 04:04:04')  '2001-01-01'  '18:30'  \n",
       "0      Timestamp('2005-04-04 04:04:04')  '2001-01-01'  '18:30'  \n",
       "0      Timestamp('2005-04-04 04:04:04')  '2001-01-01'  '18:30'  \n",
       "0      Timestamp('2005-04-04 04:04:04')  '2001-01-01'  '18:30'  \n",
       "0      Timestamp('2005-04-04 04:04:04')  '2001-01-01'  '18:30'  "
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(CsvSwStrategy(ENDPOINT).query_pandas(TEST_QUERY, index_col='index')\n",
    " .astype({'datetime': 'datetime64'}).pipe(pipe_info).applymap(repr))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "947aa733-b6ff-47cf-a862-d0ab23ac147c",
   "metadata": {},
   "source": [
    "### `SPARQLWrapper.SPARQLWrapper(returnFormat='xml')`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "64ffe095-4c87-4b80-bb93-077c18a39c5c",
   "metadata": {},
   "outputs": [],
   "source": [
    "class XmlSwStrategy(QueryService):\n",
    "\n",
    "    read_sparql_query_result = staticmethod(read_sparql_query_xml)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "bcf515da-e4a7-45e2-b4f0-9c38fba6895b",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[INFO@root] endpoint: 'https://query.wikidata.org/sparql'\n",
      "[INFO@root] returnFormat: 'xml'\n",
      "[INFO@root] result: <SPARQLWrapper.Wrapper.QueryResult object at 0x000001E1EF0FB670>\n",
      "[INFO@root] content_type: 'application/sparql-results+xml;charset=utf-8'\n",
      "[INFO@root] xml: <Element '{http://www.w3.org/2005/sparql-results#}sparql' at 0x000001E1EF12B060>\n",
      "[INFO@root] pandas.DataFrame.from_records(<generator object iterrecords at 0x000001E1EF0D1D20>, **{'index': 'index', 'columns': ['index', 'string', 'integer', 'boolean', 'decimal', 'double', 'float', 'datetime', 'date', 'time'], 'coerce_float': True})\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 8 entries, 0 to 0\n",
      "Data columns (total 9 columns):\n",
      " #   Column    Non-Null Count  Dtype         \n",
      "---  ------    --------------  -----         \n",
      " 0   string    8 non-null      object        \n",
      " 1   integer   8 non-null      int64         \n",
      " 2   boolean   8 non-null      bool          \n",
      " 3   decimal   8 non-null      float64       \n",
      " 4   double    8 non-null      float64       \n",
      " 5   float     8 non-null      float64       \n",
      " 6   datetime  8 non-null      datetime64[ns]\n",
      " 7   date      8 non-null      object        \n",
      " 8   time      8 non-null      object        \n",
      "dtypes: bool(1), datetime64[ns](1), float64(3), int64(1), object(3)\n",
      "memory usage: 1.5 KB\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>string</th>\n",
       "      <th>integer</th>\n",
       "      <th>boolean</th>\n",
       "      <th>decimal</th>\n",
       "      <th>double</th>\n",
       "      <th>float</th>\n",
       "      <th>datetime</th>\n",
       "      <th>date</th>\n",
       "      <th>time</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>index</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'spam'</td>\n",
       "      <td>7</td>\n",
       "      <td>True</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>datetime.date(2001, 1, 1)</td>\n",
       "      <td>datetime.time(18, 30)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'spam'</td>\n",
       "      <td>7</td>\n",
       "      <td>False</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>datetime.date(2001, 1, 1)</td>\n",
       "      <td>datetime.time(18, 30)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'spam'</td>\n",
       "      <td>42</td>\n",
       "      <td>True</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>datetime.date(2001, 1, 1)</td>\n",
       "      <td>datetime.time(18, 30)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'spam'</td>\n",
       "      <td>42</td>\n",
       "      <td>False</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>datetime.date(2001, 1, 1)</td>\n",
       "      <td>datetime.time(18, 30)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'eggs'</td>\n",
       "      <td>7</td>\n",
       "      <td>True</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>datetime.date(2001, 1, 1)</td>\n",
       "      <td>datetime.time(18, 30)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'eggs'</td>\n",
       "      <td>7</td>\n",
       "      <td>False</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>datetime.date(2001, 1, 1)</td>\n",
       "      <td>datetime.time(18, 30)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'eggs'</td>\n",
       "      <td>42</td>\n",
       "      <td>True</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>datetime.date(2001, 1, 1)</td>\n",
       "      <td>datetime.time(18, 30)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'eggs'</td>\n",
       "      <td>42</td>\n",
       "      <td>False</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>datetime.date(2001, 1, 1)</td>\n",
       "      <td>datetime.time(18, 30)</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       string integer boolean decimal     double  float  \\\n",
       "index                                                     \n",
       "0      'spam'       7    True     1.3  1000000.0  6.275   \n",
       "0      'spam'       7   False     1.3  1000000.0  6.275   \n",
       "0      'spam'      42    True     1.3  1000000.0  6.275   \n",
       "0      'spam'      42   False     1.3  1000000.0  6.275   \n",
       "0      'eggs'       7    True     1.3  1000000.0  6.275   \n",
       "0      'eggs'       7   False     1.3  1000000.0  6.275   \n",
       "0      'eggs'      42    True     1.3  1000000.0  6.275   \n",
       "0      'eggs'      42   False     1.3  1000000.0  6.275   \n",
       "\n",
       "                               datetime                       date  \\\n",
       "index                                                                \n",
       "0      Timestamp('2005-04-04 04:04:04')  datetime.date(2001, 1, 1)   \n",
       "0      Timestamp('2005-04-04 04:04:04')  datetime.date(2001, 1, 1)   \n",
       "0      Timestamp('2005-04-04 04:04:04')  datetime.date(2001, 1, 1)   \n",
       "0      Timestamp('2005-04-04 04:04:04')  datetime.date(2001, 1, 1)   \n",
       "0      Timestamp('2005-04-04 04:04:04')  datetime.date(2001, 1, 1)   \n",
       "0      Timestamp('2005-04-04 04:04:04')  datetime.date(2001, 1, 1)   \n",
       "0      Timestamp('2005-04-04 04:04:04')  datetime.date(2001, 1, 1)   \n",
       "0      Timestamp('2005-04-04 04:04:04')  datetime.date(2001, 1, 1)   \n",
       "\n",
       "                        time  \n",
       "index                         \n",
       "0      datetime.time(18, 30)  \n",
       "0      datetime.time(18, 30)  \n",
       "0      datetime.time(18, 30)  \n",
       "0      datetime.time(18, 30)  \n",
       "0      datetime.time(18, 30)  \n",
       "0      datetime.time(18, 30)  \n",
       "0      datetime.time(18, 30)  \n",
       "0      datetime.time(18, 30)  "
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "XmlSwStrategy(ENDPOINT).query_pandas(TEST_QUERY, index='index').pipe(pipe_info).applymap(repr)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b167836d-de6c-4f99-a840-ff05cb41d623",
   "metadata": {},
   "source": [
    "### `SPARQLWrapper.SPARQLWrapper2()`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "69ea97b4-8814-45c6-ad0f-aeb27896a59a",
   "metadata": {},
   "outputs": [],
   "source": [
    "def read_sparql_query_json_result(result, **kwargs) -> pd.DataFrame:\n",
    "    kwargs.setdefault('columns', result.variables)\n",
    "    records = (tuple(itervalues_json(binding, variables=result.variables))\n",
    "               for binding in result.bindings)\n",
    "    return pd_dataframe_from_records(records, **kwargs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "688cfb61-97ba-4aa4-b794-146c00ff5b2c",
   "metadata": {},
   "outputs": [],
   "source": [
    "def itervalues_json(binding, *, variables):\n",
    "    for v in variables:\n",
    "        if v not in binding:\n",
    "            yield None\n",
    "            continue\n",
    "\n",
    "        value_dict = binding[v]\n",
    "        if value_dict.type not in ('literal', 'uri', 'bnode'):\n",
    "            raise ValueError(f'invalid binding value type {b.type!r}')\n",
    "        value = value_dict.value\n",
    "        if value_dict.type == 'literal' and value_dict.datatype is not None:\n",
    "            try:\n",
    "                parse_value = PARSE_FUNC[value_dict.datatype]\n",
    "            except KeyError:\n",
    "                warnings.warn('cannot convert unmatched datatype: {value_dict!r}')\n",
    "            else:\n",
    "                value = parse_value(value)\n",
    "        yield value"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "fd10ccbd-65b2-4dcc-8fc5-2ac154ce3833",
   "metadata": {},
   "outputs": [],
   "source": [
    "class JsonResultStrategy(JsonReturnFormatQueryService):\n",
    "\n",
    "    read_sparql_query_result = staticmethod(read_sparql_query_json_result)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "id": "b22dbc5a-cc0e-405b-ae2e-0ab3cf2e7fc6",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[INFO@root] endpoint: 'https://query.wikidata.org/sparql'\n",
      "[INFO@root] result: <SPARQLWrapper.SmartWrapper.Bindings object at 0x000001E1EF0FB9D0>\n",
      "[INFO@root] result.variables: ['index', 'string', 'integer', 'boolean', 'decimal', 'double', 'float', 'datetime', 'date', 'time']\n",
      "[INFO@root] pandas.DataFrame.from_records(<generator object read_sparql_query_json_result.<locals>.<genexpr> at 0x000001E1EF0D2180>, **{'index': 'index', 'columns': ['index', 'string', 'integer', 'boolean', 'decimal', 'double', 'float', 'datetime', 'date', 'time'], 'coerce_float': True})\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 8 entries, 0 to 0\n",
      "Data columns (total 9 columns):\n",
      " #   Column    Non-Null Count  Dtype         \n",
      "---  ------    --------------  -----         \n",
      " 0   string    8 non-null      object        \n",
      " 1   integer   8 non-null      int64         \n",
      " 2   boolean   8 non-null      bool          \n",
      " 3   decimal   8 non-null      float64       \n",
      " 4   double    8 non-null      float64       \n",
      " 5   float     8 non-null      float64       \n",
      " 6   datetime  8 non-null      datetime64[ns]\n",
      " 7   date      8 non-null      object        \n",
      " 8   time      8 non-null      object        \n",
      "dtypes: bool(1), datetime64[ns](1), float64(3), int64(1), object(3)\n",
      "memory usage: 1.5 KB\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>string</th>\n",
       "      <th>integer</th>\n",
       "      <th>boolean</th>\n",
       "      <th>decimal</th>\n",
       "      <th>double</th>\n",
       "      <th>float</th>\n",
       "      <th>datetime</th>\n",
       "      <th>date</th>\n",
       "      <th>time</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>index</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'spam'</td>\n",
       "      <td>7</td>\n",
       "      <td>True</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>datetime.date(2001, 1, 1)</td>\n",
       "      <td>datetime.time(18, 30)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'spam'</td>\n",
       "      <td>7</td>\n",
       "      <td>False</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>datetime.date(2001, 1, 1)</td>\n",
       "      <td>datetime.time(18, 30)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'spam'</td>\n",
       "      <td>42</td>\n",
       "      <td>True</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>datetime.date(2001, 1, 1)</td>\n",
       "      <td>datetime.time(18, 30)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'spam'</td>\n",
       "      <td>42</td>\n",
       "      <td>False</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>datetime.date(2001, 1, 1)</td>\n",
       "      <td>datetime.time(18, 30)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'eggs'</td>\n",
       "      <td>7</td>\n",
       "      <td>True</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>datetime.date(2001, 1, 1)</td>\n",
       "      <td>datetime.time(18, 30)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'eggs'</td>\n",
       "      <td>7</td>\n",
       "      <td>False</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>datetime.date(2001, 1, 1)</td>\n",
       "      <td>datetime.time(18, 30)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'eggs'</td>\n",
       "      <td>42</td>\n",
       "      <td>True</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>datetime.date(2001, 1, 1)</td>\n",
       "      <td>datetime.time(18, 30)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'eggs'</td>\n",
       "      <td>42</td>\n",
       "      <td>False</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>datetime.date(2001, 1, 1)</td>\n",
       "      <td>datetime.time(18, 30)</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       string integer boolean decimal     double  float  \\\n",
       "index                                                     \n",
       "0      'spam'       7    True     1.3  1000000.0  6.275   \n",
       "0      'spam'       7   False     1.3  1000000.0  6.275   \n",
       "0      'spam'      42    True     1.3  1000000.0  6.275   \n",
       "0      'spam'      42   False     1.3  1000000.0  6.275   \n",
       "0      'eggs'       7    True     1.3  1000000.0  6.275   \n",
       "0      'eggs'       7   False     1.3  1000000.0  6.275   \n",
       "0      'eggs'      42    True     1.3  1000000.0  6.275   \n",
       "0      'eggs'      42   False     1.3  1000000.0  6.275   \n",
       "\n",
       "                               datetime                       date  \\\n",
       "index                                                                \n",
       "0      Timestamp('2005-04-04 04:04:04')  datetime.date(2001, 1, 1)   \n",
       "0      Timestamp('2005-04-04 04:04:04')  datetime.date(2001, 1, 1)   \n",
       "0      Timestamp('2005-04-04 04:04:04')  datetime.date(2001, 1, 1)   \n",
       "0      Timestamp('2005-04-04 04:04:04')  datetime.date(2001, 1, 1)   \n",
       "0      Timestamp('2005-04-04 04:04:04')  datetime.date(2001, 1, 1)   \n",
       "0      Timestamp('2005-04-04 04:04:04')  datetime.date(2001, 1, 1)   \n",
       "0      Timestamp('2005-04-04 04:04:04')  datetime.date(2001, 1, 1)   \n",
       "0      Timestamp('2005-04-04 04:04:04')  datetime.date(2001, 1, 1)   \n",
       "\n",
       "                        time  \n",
       "index                         \n",
       "0      datetime.time(18, 30)  \n",
       "0      datetime.time(18, 30)  \n",
       "0      datetime.time(18, 30)  \n",
       "0      datetime.time(18, 30)  \n",
       "0      datetime.time(18, 30)  \n",
       "0      datetime.time(18, 30)  \n",
       "0      datetime.time(18, 30)  \n",
       "0      datetime.time(18, 30)  "
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "JsonResultStrategy(ENDPOINT).query_pandas(TEST_QUERY, index='index').pipe(pipe_info).applymap(repr)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d020a3fc-248c-4537-a0ad-17a387272863",
   "metadata": {},
   "source": [
    "### `SPARQLWrapper.SPARQLWrapper(returnFormat='json')`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "id": "650f5ce1-b0c4-497d-9665-42409061e0b9",
   "metadata": {},
   "outputs": [],
   "source": [
    "def read_sparql_query_json_normalize(result, *,  raw: bool = False, **kwargs) -> pd.DataFrame:\n",
    "    jsondata = result.convert()\n",
    "    logging.info(\"result['head']: %r\", jsondata['head'])\n",
    "    return _read_sparql_query_json_normalize(jsondata, raw=raw, **kwargs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "id": "25aaec36-4cbc-4c7c-8636-413079ecb93c",
   "metadata": {},
   "outputs": [],
   "source": [
    "PARSE_DTYPE = {f'{XSD}string': 'string',\n",
    "               f'{XSD}integer': 'int',\n",
    "               f'{XSD}boolean': 'bool',\n",
    "               f'{XSD}decimal': 'float',\n",
    "               f'{XSD}double': 'float',\n",
    "               f'{XSD}float': 'float',\n",
    "               f'{XSD}dateTime': 'datetime64',\n",
    "               f'{XSD}date': 'string',\n",
    "               f'{XSD}time': 'string'}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "id": "aa31d885-1a16-4964-9d33-560a3047bde6",
   "metadata": {},
   "outputs": [],
   "source": [
    "def _read_sparql_query_json_normalize(jsondata, *, raw: bool, **kwargs) -> pd.DataFrame:\n",
    "    df = pd_json_normalize(jsondata, **kwargs)\n",
    "    if df.empty:\n",
    "        return df\n",
    "    if not raw:\n",
    "        columns = jsondata['head']['vars']\n",
    "        rename = {f'{c}.value': c for c in columns}\n",
    "        dtype = {name: PARSE_DTYPE[binding['datatype']]\n",
    "                       if binding['type'] == 'literal' and 'datatype' in binding\n",
    "                       else 'string'\n",
    "                 for name, binding in jsondata['results']['bindings'][0].items()}\n",
    "        booleans = [name for name, d in dtype.items() if d in ('bool', 'boolean')]\n",
    "        df = df[list(rename)].rename(rename, axis='columns')\n",
    "        df[booleans] = (df[booleans]\n",
    "                        .replace(r'^[Tt]rue$', True, regex=True)\n",
    "                        .replace(r'^[Ff]alse$', False, regex=True))\n",
    "        return df.astype(dtype)\n",
    "    return df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "id": "b919c3be-569c-4b15-8256-c31598fded50",
   "metadata": {},
   "outputs": [],
   "source": [
    "class JsonNormalizeStrategy(QueryService):\n",
    "\n",
    "    def __init__(self, endpoint, *, returnFormat=sparqlwrapper.JSON, **kwargs) -> None:\n",
    "        super().__init__(endpoint, returnFormat=returnFormat, **kwargs)\n",
    "\n",
    "    read_sparql_query_result = staticmethod(read_sparql_query_json_normalize)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "id": "7069bf4b-c35e-4e13-a901-25a5f90cd7dc",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[INFO@root] endpoint: 'https://query.wikidata.org/sparql'\n",
      "[INFO@root] returnFormat: 'json'\n",
      "[INFO@root] result: <SPARQLWrapper.Wrapper.QueryResult object at 0x000001E1EEB99CF0>\n",
      "[INFO@root] content_type: 'application/sparql-results+json;charset=utf-8'\n",
      "[INFO@root] result['head']: {'vars': ['index', 'string', 'integer', 'boolean', 'decimal', 'double', 'float', 'datetime', 'date', 'time']}\n",
      "[INFO@root] pandas.json_normalize(jsondata, **{'record_path': ['results', 'bindings']})\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 8 entries, 0 to 0\n",
      "Data columns (total 9 columns):\n",
      " #   Column    Non-Null Count  Dtype         \n",
      "---  ------    --------------  -----         \n",
      " 0   string    8 non-null      string        \n",
      " 1   integer   8 non-null      int32         \n",
      " 2   boolean   8 non-null      bool          \n",
      " 3   decimal   8 non-null      float64       \n",
      " 4   double    8 non-null      float64       \n",
      " 5   float     8 non-null      float64       \n",
      " 6   datetime  8 non-null      datetime64[ns]\n",
      " 7   date      8 non-null      string        \n",
      " 8   time      8 non-null      string        \n",
      "dtypes: bool(1), datetime64[ns](1), float64(3), int32(1), string(3)\n",
      "memory usage: 1.8 KB\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>string</th>\n",
       "      <th>integer</th>\n",
       "      <th>boolean</th>\n",
       "      <th>decimal</th>\n",
       "      <th>double</th>\n",
       "      <th>float</th>\n",
       "      <th>datetime</th>\n",
       "      <th>date</th>\n",
       "      <th>time</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>index</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'spam'</td>\n",
       "      <td>7</td>\n",
       "      <td>True</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>'2001-01-01'</td>\n",
       "      <td>'18:30'</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'spam'</td>\n",
       "      <td>7</td>\n",
       "      <td>False</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>'2001-01-01'</td>\n",
       "      <td>'18:30'</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'spam'</td>\n",
       "      <td>42</td>\n",
       "      <td>True</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>'2001-01-01'</td>\n",
       "      <td>'18:30'</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'spam'</td>\n",
       "      <td>42</td>\n",
       "      <td>False</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>'2001-01-01'</td>\n",
       "      <td>'18:30'</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'eggs'</td>\n",
       "      <td>7</td>\n",
       "      <td>True</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>'2001-01-01'</td>\n",
       "      <td>'18:30'</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'eggs'</td>\n",
       "      <td>7</td>\n",
       "      <td>False</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>'2001-01-01'</td>\n",
       "      <td>'18:30'</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'eggs'</td>\n",
       "      <td>42</td>\n",
       "      <td>True</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>'2001-01-01'</td>\n",
       "      <td>'18:30'</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'eggs'</td>\n",
       "      <td>42</td>\n",
       "      <td>False</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>6.275</td>\n",
       "      <td>Timestamp('2005-04-04 04:04:04')</td>\n",
       "      <td>'2001-01-01'</td>\n",
       "      <td>'18:30'</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       string integer boolean decimal     double  float  \\\n",
       "index                                                     \n",
       "0      'spam'       7    True     1.3  1000000.0  6.275   \n",
       "0      'spam'       7   False     1.3  1000000.0  6.275   \n",
       "0      'spam'      42    True     1.3  1000000.0  6.275   \n",
       "0      'spam'      42   False     1.3  1000000.0  6.275   \n",
       "0      'eggs'       7    True     1.3  1000000.0  6.275   \n",
       "0      'eggs'       7   False     1.3  1000000.0  6.275   \n",
       "0      'eggs'      42    True     1.3  1000000.0  6.275   \n",
       "0      'eggs'      42   False     1.3  1000000.0  6.275   \n",
       "\n",
       "                               datetime          date     time  \n",
       "index                                                           \n",
       "0      Timestamp('2005-04-04 04:04:04')  '2001-01-01'  '18:30'  \n",
       "0      Timestamp('2005-04-04 04:04:04')  '2001-01-01'  '18:30'  \n",
       "0      Timestamp('2005-04-04 04:04:04')  '2001-01-01'  '18:30'  \n",
       "0      Timestamp('2005-04-04 04:04:04')  '2001-01-01'  '18:30'  \n",
       "0      Timestamp('2005-04-04 04:04:04')  '2001-01-01'  '18:30'  \n",
       "0      Timestamp('2005-04-04 04:04:04')  '2001-01-01'  '18:30'  \n",
       "0      Timestamp('2005-04-04 04:04:04')  '2001-01-01'  '18:30'  \n",
       "0      Timestamp('2005-04-04 04:04:04')  '2001-01-01'  '18:30'  "
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "JsonNormalizeStrategy(ENDPOINT).query_pandas(TEST_QUERY).set_index('index').pipe(pipe_info).applymap(repr)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "id": "ef657e62-7f89-413a-91a8-5bba2b3fefb8",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[INFO@root] endpoint: 'https://query.wikidata.org/sparql'\n",
      "[INFO@root] returnFormat: 'json'\n",
      "[INFO@root] result: <SPARQLWrapper.Wrapper.QueryResult object at 0x000001E1EEB998D0>\n",
      "[INFO@root] content_type: 'application/sparql-results+json;charset=utf-8'\n",
      "[INFO@root] result['head']: {'vars': ['index', 'string', 'integer', 'boolean', 'decimal', 'double', 'float', 'datetime', 'date', 'time']}\n",
      "[INFO@root] pandas.json_normalize(jsondata, **{'sep': '/', 'record_path': ['results', 'bindings']})\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Index: 8 entries, 0 to 0\n",
      "Data columns (total 28 columns):\n",
      " #   Column             Non-Null Count  Dtype \n",
      "---  ------             --------------  ----- \n",
      " 0   index/datatype     8 non-null      object\n",
      " 1   index/type         8 non-null      object\n",
      " 2   string/type        8 non-null      object\n",
      " 3   string/value       8 non-null      object\n",
      " 4   integer/datatype   8 non-null      object\n",
      " 5   integer/type       8 non-null      object\n",
      " 6   integer/value      8 non-null      object\n",
      " 7   boolean/datatype   8 non-null      object\n",
      " 8   boolean/type       8 non-null      object\n",
      " 9   boolean/value      8 non-null      object\n",
      " 10  decimal/datatype   8 non-null      object\n",
      " 11  decimal/type       8 non-null      object\n",
      " 12  decimal/value      8 non-null      object\n",
      " 13  double/datatype    8 non-null      object\n",
      " 14  double/type        8 non-null      object\n",
      " 15  double/value       8 non-null      object\n",
      " 16  float/datatype     8 non-null      object\n",
      " 17  float/type         8 non-null      object\n",
      " 18  float/value        8 non-null      object\n",
      " 19  datetime/datatype  8 non-null      object\n",
      " 20  datetime/type      8 non-null      object\n",
      " 21  datetime/value     8 non-null      object\n",
      " 22  date/datatype      8 non-null      object\n",
      " 23  date/type          8 non-null      object\n",
      " 24  date/value         8 non-null      object\n",
      " 25  time/datatype      8 non-null      object\n",
      " 26  time/type          8 non-null      object\n",
      " 27  time/value         8 non-null      object\n",
      "dtypes: object(28)\n",
      "memory usage: 16.7 KB\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>index/datatype</th>\n",
       "      <th>index/type</th>\n",
       "      <th>string/type</th>\n",
       "      <th>string/value</th>\n",
       "      <th>integer/datatype</th>\n",
       "      <th>integer/type</th>\n",
       "      <th>integer/value</th>\n",
       "      <th>boolean/datatype</th>\n",
       "      <th>boolean/type</th>\n",
       "      <th>boolean/value</th>\n",
       "      <th>...</th>\n",
       "      <th>float/value</th>\n",
       "      <th>datetime/datatype</th>\n",
       "      <th>datetime/type</th>\n",
       "      <th>datetime/value</th>\n",
       "      <th>date/datatype</th>\n",
       "      <th>date/type</th>\n",
       "      <th>date/value</th>\n",
       "      <th>time/datatype</th>\n",
       "      <th>time/type</th>\n",
       "      <th>time/value</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>index/value</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'http://www.w3.org/2001/XMLSchema#integer'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'spam'</td>\n",
       "      <td>'http://www.w3.org/2001/XMLSchema#integer'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'7'</td>\n",
       "      <td>'http://www.w3.org/2001/XMLSchema#boolean'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'true'</td>\n",
       "      <td>...</td>\n",
       "      <td>'6.275'</td>\n",
       "      <td>'http://www.w3.org/2001/XMLSchema#dateTime'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'2005-04-04T04:04:04'</td>\n",
       "      <td>'http://www.w3.org/2001/XMLSchema#date'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'2001-01-01'</td>\n",
       "      <td>'http://www.w3.org/2001/XMLSchema#time'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'18:30'</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'http://www.w3.org/2001/XMLSchema#integer'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'spam'</td>\n",
       "      <td>'http://www.w3.org/2001/XMLSchema#integer'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'7'</td>\n",
       "      <td>'http://www.w3.org/2001/XMLSchema#boolean'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'false'</td>\n",
       "      <td>...</td>\n",
       "      <td>'6.275'</td>\n",
       "      <td>'http://www.w3.org/2001/XMLSchema#dateTime'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'2005-04-04T04:04:04'</td>\n",
       "      <td>'http://www.w3.org/2001/XMLSchema#date'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'2001-01-01'</td>\n",
       "      <td>'http://www.w3.org/2001/XMLSchema#time'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'18:30'</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'http://www.w3.org/2001/XMLSchema#integer'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'spam'</td>\n",
       "      <td>'http://www.w3.org/2001/XMLSchema#integer'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'42'</td>\n",
       "      <td>'http://www.w3.org/2001/XMLSchema#boolean'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'true'</td>\n",
       "      <td>...</td>\n",
       "      <td>'6.275'</td>\n",
       "      <td>'http://www.w3.org/2001/XMLSchema#dateTime'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'2005-04-04T04:04:04'</td>\n",
       "      <td>'http://www.w3.org/2001/XMLSchema#date'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'2001-01-01'</td>\n",
       "      <td>'http://www.w3.org/2001/XMLSchema#time'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'18:30'</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'http://www.w3.org/2001/XMLSchema#integer'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'spam'</td>\n",
       "      <td>'http://www.w3.org/2001/XMLSchema#integer'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'42'</td>\n",
       "      <td>'http://www.w3.org/2001/XMLSchema#boolean'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'false'</td>\n",
       "      <td>...</td>\n",
       "      <td>'6.275'</td>\n",
       "      <td>'http://www.w3.org/2001/XMLSchema#dateTime'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'2005-04-04T04:04:04'</td>\n",
       "      <td>'http://www.w3.org/2001/XMLSchema#date'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'2001-01-01'</td>\n",
       "      <td>'http://www.w3.org/2001/XMLSchema#time'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'18:30'</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'http://www.w3.org/2001/XMLSchema#integer'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'eggs'</td>\n",
       "      <td>'http://www.w3.org/2001/XMLSchema#integer'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'7'</td>\n",
       "      <td>'http://www.w3.org/2001/XMLSchema#boolean'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'true'</td>\n",
       "      <td>...</td>\n",
       "      <td>'6.275'</td>\n",
       "      <td>'http://www.w3.org/2001/XMLSchema#dateTime'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'2005-04-04T04:04:04'</td>\n",
       "      <td>'http://www.w3.org/2001/XMLSchema#date'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'2001-01-01'</td>\n",
       "      <td>'http://www.w3.org/2001/XMLSchema#time'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'18:30'</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'http://www.w3.org/2001/XMLSchema#integer'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'eggs'</td>\n",
       "      <td>'http://www.w3.org/2001/XMLSchema#integer'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'7'</td>\n",
       "      <td>'http://www.w3.org/2001/XMLSchema#boolean'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'false'</td>\n",
       "      <td>...</td>\n",
       "      <td>'6.275'</td>\n",
       "      <td>'http://www.w3.org/2001/XMLSchema#dateTime'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'2005-04-04T04:04:04'</td>\n",
       "      <td>'http://www.w3.org/2001/XMLSchema#date'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'2001-01-01'</td>\n",
       "      <td>'http://www.w3.org/2001/XMLSchema#time'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'18:30'</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'http://www.w3.org/2001/XMLSchema#integer'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'eggs'</td>\n",
       "      <td>'http://www.w3.org/2001/XMLSchema#integer'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'42'</td>\n",
       "      <td>'http://www.w3.org/2001/XMLSchema#boolean'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'true'</td>\n",
       "      <td>...</td>\n",
       "      <td>'6.275'</td>\n",
       "      <td>'http://www.w3.org/2001/XMLSchema#dateTime'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'2005-04-04T04:04:04'</td>\n",
       "      <td>'http://www.w3.org/2001/XMLSchema#date'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'2001-01-01'</td>\n",
       "      <td>'http://www.w3.org/2001/XMLSchema#time'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'18:30'</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'http://www.w3.org/2001/XMLSchema#integer'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'eggs'</td>\n",
       "      <td>'http://www.w3.org/2001/XMLSchema#integer'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'42'</td>\n",
       "      <td>'http://www.w3.org/2001/XMLSchema#boolean'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'false'</td>\n",
       "      <td>...</td>\n",
       "      <td>'6.275'</td>\n",
       "      <td>'http://www.w3.org/2001/XMLSchema#dateTime'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'2005-04-04T04:04:04'</td>\n",
       "      <td>'http://www.w3.org/2001/XMLSchema#date'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'2001-01-01'</td>\n",
       "      <td>'http://www.w3.org/2001/XMLSchema#time'</td>\n",
       "      <td>'literal'</td>\n",
       "      <td>'18:30'</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>8 rows × 28 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                         index/datatype index/type  \\\n",
       "index/value                                                          \n",
       "0            'http://www.w3.org/2001/XMLSchema#integer'  'literal'   \n",
       "0            'http://www.w3.org/2001/XMLSchema#integer'  'literal'   \n",
       "0            'http://www.w3.org/2001/XMLSchema#integer'  'literal'   \n",
       "0            'http://www.w3.org/2001/XMLSchema#integer'  'literal'   \n",
       "0            'http://www.w3.org/2001/XMLSchema#integer'  'literal'   \n",
       "0            'http://www.w3.org/2001/XMLSchema#integer'  'literal'   \n",
       "0            'http://www.w3.org/2001/XMLSchema#integer'  'literal'   \n",
       "0            'http://www.w3.org/2001/XMLSchema#integer'  'literal'   \n",
       "\n",
       "            string/type string/value  \\\n",
       "index/value                            \n",
       "0             'literal'       'spam'   \n",
       "0             'literal'       'spam'   \n",
       "0             'literal'       'spam'   \n",
       "0             'literal'       'spam'   \n",
       "0             'literal'       'eggs'   \n",
       "0             'literal'       'eggs'   \n",
       "0             'literal'       'eggs'   \n",
       "0             'literal'       'eggs'   \n",
       "\n",
       "                                       integer/datatype integer/type  \\\n",
       "index/value                                                            \n",
       "0            'http://www.w3.org/2001/XMLSchema#integer'    'literal'   \n",
       "0            'http://www.w3.org/2001/XMLSchema#integer'    'literal'   \n",
       "0            'http://www.w3.org/2001/XMLSchema#integer'    'literal'   \n",
       "0            'http://www.w3.org/2001/XMLSchema#integer'    'literal'   \n",
       "0            'http://www.w3.org/2001/XMLSchema#integer'    'literal'   \n",
       "0            'http://www.w3.org/2001/XMLSchema#integer'    'literal'   \n",
       "0            'http://www.w3.org/2001/XMLSchema#integer'    'literal'   \n",
       "0            'http://www.w3.org/2001/XMLSchema#integer'    'literal'   \n",
       "\n",
       "            integer/value                            boolean/datatype  \\\n",
       "index/value                                                             \n",
       "0                     '7'  'http://www.w3.org/2001/XMLSchema#boolean'   \n",
       "0                     '7'  'http://www.w3.org/2001/XMLSchema#boolean'   \n",
       "0                    '42'  'http://www.w3.org/2001/XMLSchema#boolean'   \n",
       "0                    '42'  'http://www.w3.org/2001/XMLSchema#boolean'   \n",
       "0                     '7'  'http://www.w3.org/2001/XMLSchema#boolean'   \n",
       "0                     '7'  'http://www.w3.org/2001/XMLSchema#boolean'   \n",
       "0                    '42'  'http://www.w3.org/2001/XMLSchema#boolean'   \n",
       "0                    '42'  'http://www.w3.org/2001/XMLSchema#boolean'   \n",
       "\n",
       "            boolean/type boolean/value  ... float/value  \\\n",
       "index/value                             ...               \n",
       "0              'literal'        'true'  ...     '6.275'   \n",
       "0              'literal'       'false'  ...     '6.275'   \n",
       "0              'literal'        'true'  ...     '6.275'   \n",
       "0              'literal'       'false'  ...     '6.275'   \n",
       "0              'literal'        'true'  ...     '6.275'   \n",
       "0              'literal'       'false'  ...     '6.275'   \n",
       "0              'literal'        'true'  ...     '6.275'   \n",
       "0              'literal'       'false'  ...     '6.275'   \n",
       "\n",
       "                                       datetime/datatype datetime/type  \\\n",
       "index/value                                                              \n",
       "0            'http://www.w3.org/2001/XMLSchema#dateTime'     'literal'   \n",
       "0            'http://www.w3.org/2001/XMLSchema#dateTime'     'literal'   \n",
       "0            'http://www.w3.org/2001/XMLSchema#dateTime'     'literal'   \n",
       "0            'http://www.w3.org/2001/XMLSchema#dateTime'     'literal'   \n",
       "0            'http://www.w3.org/2001/XMLSchema#dateTime'     'literal'   \n",
       "0            'http://www.w3.org/2001/XMLSchema#dateTime'     'literal'   \n",
       "0            'http://www.w3.org/2001/XMLSchema#dateTime'     'literal'   \n",
       "0            'http://www.w3.org/2001/XMLSchema#dateTime'     'literal'   \n",
       "\n",
       "                    datetime/value                            date/datatype  \\\n",
       "index/value                                                                   \n",
       "0            '2005-04-04T04:04:04'  'http://www.w3.org/2001/XMLSchema#date'   \n",
       "0            '2005-04-04T04:04:04'  'http://www.w3.org/2001/XMLSchema#date'   \n",
       "0            '2005-04-04T04:04:04'  'http://www.w3.org/2001/XMLSchema#date'   \n",
       "0            '2005-04-04T04:04:04'  'http://www.w3.org/2001/XMLSchema#date'   \n",
       "0            '2005-04-04T04:04:04'  'http://www.w3.org/2001/XMLSchema#date'   \n",
       "0            '2005-04-04T04:04:04'  'http://www.w3.org/2001/XMLSchema#date'   \n",
       "0            '2005-04-04T04:04:04'  'http://www.w3.org/2001/XMLSchema#date'   \n",
       "0            '2005-04-04T04:04:04'  'http://www.w3.org/2001/XMLSchema#date'   \n",
       "\n",
       "             date/type    date/value                            time/datatype  \\\n",
       "index/value                                                                     \n",
       "0            'literal'  '2001-01-01'  'http://www.w3.org/2001/XMLSchema#time'   \n",
       "0            'literal'  '2001-01-01'  'http://www.w3.org/2001/XMLSchema#time'   \n",
       "0            'literal'  '2001-01-01'  'http://www.w3.org/2001/XMLSchema#time'   \n",
       "0            'literal'  '2001-01-01'  'http://www.w3.org/2001/XMLSchema#time'   \n",
       "0            'literal'  '2001-01-01'  'http://www.w3.org/2001/XMLSchema#time'   \n",
       "0            'literal'  '2001-01-01'  'http://www.w3.org/2001/XMLSchema#time'   \n",
       "0            'literal'  '2001-01-01'  'http://www.w3.org/2001/XMLSchema#time'   \n",
       "0            'literal'  '2001-01-01'  'http://www.w3.org/2001/XMLSchema#time'   \n",
       "\n",
       "             time/type time/value  \n",
       "index/value                        \n",
       "0            'literal'    '18:30'  \n",
       "0            'literal'    '18:30'  \n",
       "0            'literal'    '18:30'  \n",
       "0            'literal'    '18:30'  \n",
       "0            'literal'    '18:30'  \n",
       "0            'literal'    '18:30'  \n",
       "0            'literal'    '18:30'  \n",
       "0            'literal'    '18:30'  \n",
       "\n",
       "[8 rows x 28 columns]"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "JsonNormalizeStrategy(ENDPOINT).query_pandas(TEST_QUERY, raw=True, sep='/').set_index('index/value').pipe(pipe_info).applymap(repr)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "23939e50-6d14-4cec-abdd-689dee23bf43",
   "metadata": {},
   "source": [
    "## `read_sparql_query(sparql, *, endpoint, strategy)`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "id": "9bbca0b3-15e8-4e56-b147-f6316c7ad169",
   "metadata": {},
   "outputs": [],
   "source": [
    "class ReadSparqlQuery:\n",
    "\n",
    "    @classmethod\n",
    "    @functools.lru_cache(maxsize=10)\n",
    "    def from_cache(cls, endpoint: str, *, strategy: str) -> ReadSparqlQuery:\n",
    "        inst = cls(endpoint, strategy=strategy)\n",
    "        logging.info('%r', inst)\n",
    "        return inst\n",
    "\n",
    "    _strategies = {'csv': CsvUrllibStrategy,\n",
    "                   'csv_sw': CsvSwStrategy,\n",
    "                   'json': JsonResultStrategy,\n",
    "                   'json_normalize': JsonNormalizeStrategy,\n",
    "                   'xml': XmlUrllibStrategy,\n",
    "                   'xml_sw': XmlSwStrategy,\n",
    "                   'xml_rdflib': GraphXmlStrategy}\n",
    "\n",
    "    def __init__(self, endpoint: str, *, strategy: str) -> None:\n",
    "        self.endpoint = endpoint\n",
    "        assert strategy in self._strategies\n",
    "        self._strategy = strategy\n",
    "\n",
    "    def __repr__(self) -> str:\n",
    "        return f'{self.__class__.__name__}({self.endpoint!r}, strategy={self._strategy!r})'\n",
    "\n",
    "    @functools.cached_property\n",
    "    def strategy(self):\n",
    "        strategy_cls = self._strategies[self._strategy]\n",
    "        strategy = strategy_cls(self.endpoint)\n",
    "        logging.info('strategy: %r', strategy)\n",
    "        return strategy\n",
    "\n",
    "    def __call__(self, sparql: str, **kwargs) -> pd.DataFrame:\n",
    "        return self.strategy.query_pandas(sparql, **kwargs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "id": "4ecf6e63-5dba-48b1-8494-47977d822e7d",
   "metadata": {},
   "outputs": [],
   "source": [
    "def read_sparql_query(sparql: str, *, convert_dtypes: bool = False,\n",
    "                      endpoint: str, strategy: str, **kwargs) -> pd.DataFrame:\n",
    "    reader = ReadSparqlQuery.from_cache(endpoint, strategy=strategy)\n",
    "    df = reader(sparql, **kwargs)\n",
    "    if convert_dtypes:\n",
    "        return df.convert_dtypes()\n",
    "    return df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "id": "c40aa7a7-da7e-4a34-b85c-e742dc50baff",
   "metadata": {},
   "outputs": [],
   "source": [
    "read_wikidata = functools.partial(read_sparql_query, QUERY, convert_dtypes=True, endpoint=ENDPOINT)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5791761f-99c7-4fb0-aa61-848553864874",
   "metadata": {},
   "source": [
    "### `strategy='csv'`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "id": "aa002249-49fe-49f7-acd9-8c2021c04139",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[INFO@root] ReadSparqlQuery('https://query.wikidata.org/sparql', strategy='csv')\n",
      "[INFO@root] strategy: CsvUrllibStrategy('https://query.wikidata.org/sparql')\n",
      "[INFO@root] endpoint: 'https://query.wikidata.org/sparql'\n",
      "[INFO@root] request: 'GET' <urllib.request.Request object at 0x000001E1EF0FB640>\n",
      "[INFO@root] response: 200 <http.client.HTTPResponse object at 0x000001E1EF0F9B10>\n",
      "[INFO@root] content-type: 'text/csv;charset=utf-8'\n",
      "[INFO@root] pandas.read_csv(<http.client.HTTPResponse object at 0x000001E1EF0F9B10>, **{'index_col': 'glottocode', 'na_values': '', 'keep_default_na': False, 'encoding': 'utf-8'})\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: total: 15.6 ms\n",
      "Wall time: 6.85 s\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>qid</th>\n",
       "      <th>name</th>\n",
       "      <th>title</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>glottocode</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>aant1238</th>\n",
       "      <td>Q31312216</td>\n",
       "      <td>Aantantara</td>\n",
       "      <td>&lt;NA&gt;</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>aari1238</th>\n",
       "      <td>Q85516014</td>\n",
       "      <td>Aari-Gayil</td>\n",
       "      <td>&lt;NA&gt;</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>aari1239</th>\n",
       "      <td>Q7495</td>\n",
       "      <td>Aari</td>\n",
       "      <td>Aari_language</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>aari1240</th>\n",
       "      <td>Q4661732</td>\n",
       "      <td>Aariya</td>\n",
       "      <td>Aariya_language</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>aasa1238</th>\n",
       "      <td>Q56620</td>\n",
       "      <td>Asa</td>\n",
       "      <td>Asa_language</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>zumb1240</th>\n",
       "      <td>Q56252</td>\n",
       "      <td>Zumbun</td>\n",
       "      <td>Zumbun_language</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>zuni1245</th>\n",
       "      <td>Q10188</td>\n",
       "      <td>Zuni</td>\n",
       "      <td>Zuni_language</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>zuoj1238</th>\n",
       "      <td>Q13848149</td>\n",
       "      <td>Zuojiang Zhuang</td>\n",
       "      <td>Zuojiang_Zhuang_languages</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>zuri1238</th>\n",
       "      <td>Q248682</td>\n",
       "      <td>Zurich German</td>\n",
       "      <td>Z%C3%BCrich_German</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>zyph1238</th>\n",
       "      <td>Q57004</td>\n",
       "      <td>Zyphe</td>\n",
       "      <td>Zyphe_language</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>10873 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                  qid             name                      title\n",
       "glottocode                                                       \n",
       "aant1238    Q31312216       Aantantara                       <NA>\n",
       "aari1238    Q85516014       Aari-Gayil                       <NA>\n",
       "aari1239        Q7495             Aari              Aari_language\n",
       "aari1240     Q4661732           Aariya            Aariya_language\n",
       "aasa1238       Q56620              Asa               Asa_language\n",
       "...               ...              ...                        ...\n",
       "zumb1240       Q56252           Zumbun            Zumbun_language\n",
       "zuni1245       Q10188             Zuni              Zuni_language\n",
       "zuoj1238    Q13848149  Zuojiang Zhuang  Zuojiang_Zhuang_languages\n",
       "zuri1238      Q248682    Zurich German         Z%C3%BCrich_German\n",
       "zyph1238       Q57004            Zyphe             Zyphe_language\n",
       "\n",
       "[10873 rows x 3 columns]"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%time read_wikidata(strategy='csv', index_col='glottocode')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6052a9c6-37a5-4331-9577-f0d1b9eaa7eb",
   "metadata": {},
   "source": [
    "### `strategy='xml'`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "id": "eb73feff-a3cc-47e3-90c8-d1ff6a7ea564",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[INFO@root] ReadSparqlQuery('https://query.wikidata.org/sparql', strategy='xml')\n",
      "[INFO@root] strategy: XmlUrllibStrategy('https://query.wikidata.org/sparql')\n",
      "[INFO@root] endpoint: 'https://query.wikidata.org/sparql'\n",
      "[INFO@root] request: 'GET' <urllib.request.Request object at 0x000001E1EF0FB640>\n",
      "[INFO@root] response: 200 <http.client.HTTPResponse object at 0x000001E1EEB99810>\n",
      "[INFO@root] content-type: 'application/sparql-results+xml;charset=utf-8'\n",
      "[INFO@root] xml: <Element '{http://www.w3.org/2005/sparql-results#}sparql' at 0x000001E1F119F510>\n",
      "[INFO@root] pandas.DataFrame.from_records(<generator object iterrecords at 0x000001E1F03ABE60>, **{'index': 'glottocode', 'columns': ['glottocode', 'qid', 'name', 'title'], 'coerce_float': True})\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: total: 484 ms\n",
      "Wall time: 18.7 s\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>qid</th>\n",
       "      <th>name</th>\n",
       "      <th>title</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>glottocode</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>aant1238</th>\n",
       "      <td>Q31312216</td>\n",
       "      <td>Aantantara</td>\n",
       "      <td>&lt;NA&gt;</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>aari1238</th>\n",
       "      <td>Q85516014</td>\n",
       "      <td>Aari-Gayil</td>\n",
       "      <td>&lt;NA&gt;</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>aari1239</th>\n",
       "      <td>Q7495</td>\n",
       "      <td>Aari</td>\n",
       "      <td>Aari_language</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>aari1240</th>\n",
       "      <td>Q4661732</td>\n",
       "      <td>Aariya</td>\n",
       "      <td>Aariya_language</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>aasa1238</th>\n",
       "      <td>Q56620</td>\n",
       "      <td>Asa</td>\n",
       "      <td>Asa_language</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>zumb1240</th>\n",
       "      <td>Q56252</td>\n",
       "      <td>Zumbun</td>\n",
       "      <td>Zumbun_language</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>zuni1245</th>\n",
       "      <td>Q10188</td>\n",
       "      <td>Zuni</td>\n",
       "      <td>Zuni_language</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>zuoj1238</th>\n",
       "      <td>Q13848149</td>\n",
       "      <td>Zuojiang Zhuang</td>\n",
       "      <td>Zuojiang_Zhuang_languages</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>zuri1238</th>\n",
       "      <td>Q248682</td>\n",
       "      <td>Zurich German</td>\n",
       "      <td>Z%C3%BCrich_German</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>zyph1238</th>\n",
       "      <td>Q57004</td>\n",
       "      <td>Zyphe</td>\n",
       "      <td>Zyphe_language</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>10873 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                  qid             name                      title\n",
       "glottocode                                                       \n",
       "aant1238    Q31312216       Aantantara                       <NA>\n",
       "aari1238    Q85516014       Aari-Gayil                       <NA>\n",
       "aari1239        Q7495             Aari              Aari_language\n",
       "aari1240     Q4661732           Aariya            Aariya_language\n",
       "aasa1238       Q56620              Asa               Asa_language\n",
       "...               ...              ...                        ...\n",
       "zumb1240       Q56252           Zumbun            Zumbun_language\n",
       "zuni1245       Q10188             Zuni              Zuni_language\n",
       "zuoj1238    Q13848149  Zuojiang Zhuang  Zuojiang_Zhuang_languages\n",
       "zuri1238      Q248682    Zurich German         Z%C3%BCrich_German\n",
       "zyph1238       Q57004            Zyphe             Zyphe_language\n",
       "\n",
       "[10873 rows x 3 columns]"
      ]
     },
     "execution_count": 53,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%time read_wikidata(strategy='xml', index='glottocode')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "426f9ea7-031c-47fb-89e0-04c17d6ed924",
   "metadata": {},
   "source": [
    "### `strategy='xml_rdflib`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "id": "c7edcb9e-9a58-4166-acad-e7bf0f7884cb",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[INFO@root] ReadSparqlQuery('https://query.wikidata.org/sparql', strategy='xml_rdflib')\n",
      "[INFO@root] endpoint: 'https://query.wikidata.org/sparql'\n",
      "[INFO@root] graph: [a rdflib:ConjunctiveGraph;rdflib:storage [a rdflib:Store;rdfs:label 'SPARQLStore']]\n",
      "[INFO@root] prefixes: {'schema': Namespace(\"http://schema.org/\")}\n",
      "[INFO@root] strategy: <GraphXmlStrategy 'https://query.wikidata.org/sparql' graph=<Graph identifier=Ncb3cf8bf5f4b429fb9d63d2a1a090667 (<class 'rdflib.graph.ConjunctiveGraph'>)>>\n",
      "[INFO@root] result: <rdflib.plugins.sparql.results.xmlresults.XMLResult object at 0x000001E1EF0F9C90>\n",
      "[INFO@root] pandas.DataFrame.from_records(<generator object read_sparql_query_graph.<locals>.<genexpr> at 0x000001E1F3A80270>, **{'index': 'glottocode', 'columns': ['glottocode', 'qid', 'name', 'title'], 'coerce_float': True})\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: total: 438 ms\n",
      "Wall time: 14 s\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>qid</th>\n",
       "      <th>name</th>\n",
       "      <th>title</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>glottocode</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>aant1238</th>\n",
       "      <td>Q31312216</td>\n",
       "      <td>Aantantara</td>\n",
       "      <td>&lt;NA&gt;</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>aari1238</th>\n",
       "      <td>Q85516014</td>\n",
       "      <td>Aari-Gayil</td>\n",
       "      <td>&lt;NA&gt;</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>aari1239</th>\n",
       "      <td>Q7495</td>\n",
       "      <td>Aari</td>\n",
       "      <td>Aari_language</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>aari1240</th>\n",
       "      <td>Q4661732</td>\n",
       "      <td>Aariya</td>\n",
       "      <td>Aariya_language</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>aasa1238</th>\n",
       "      <td>Q56620</td>\n",
       "      <td>Asa</td>\n",
       "      <td>Asa_language</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>zumb1240</th>\n",
       "      <td>Q56252</td>\n",
       "      <td>Zumbun</td>\n",
       "      <td>Zumbun_language</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>zuni1245</th>\n",
       "      <td>Q10188</td>\n",
       "      <td>Zuni</td>\n",
       "      <td>Zuni_language</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>zuoj1238</th>\n",
       "      <td>Q13848149</td>\n",
       "      <td>Zuojiang Zhuang</td>\n",
       "      <td>Zuojiang_Zhuang_languages</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>zuri1238</th>\n",
       "      <td>Q248682</td>\n",
       "      <td>Zurich German</td>\n",
       "      <td>Z%C3%BCrich_German</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>zyph1238</th>\n",
       "      <td>Q57004</td>\n",
       "      <td>Zyphe</td>\n",
       "      <td>Zyphe_language</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>10873 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                  qid             name                      title\n",
       "glottocode                                                       \n",
       "aant1238    Q31312216       Aantantara                       <NA>\n",
       "aari1238    Q85516014       Aari-Gayil                       <NA>\n",
       "aari1239        Q7495             Aari              Aari_language\n",
       "aari1240     Q4661732           Aariya            Aariya_language\n",
       "aasa1238       Q56620              Asa               Asa_language\n",
       "...               ...              ...                        ...\n",
       "zumb1240       Q56252           Zumbun            Zumbun_language\n",
       "zuni1245       Q10188             Zuni              Zuni_language\n",
       "zuoj1238    Q13848149  Zuojiang Zhuang  Zuojiang_Zhuang_languages\n",
       "zuri1238      Q248682    Zurich German         Z%C3%BCrich_German\n",
       "zyph1238       Q57004            Zyphe             Zyphe_language\n",
       "\n",
       "[10873 rows x 3 columns]"
      ]
     },
     "execution_count": 54,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%time read_wikidata(strategy='xml_rdflib', index='glottocode')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "235dc1b7-0e6c-4703-b219-f720bf1e9979",
   "metadata": {},
   "source": [
    "### `strategy='csv_sw'`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "id": "071cc12d-1dec-490e-b192-c7ce7c8ca274",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[INFO@root] ReadSparqlQuery('https://query.wikidata.org/sparql', strategy='csv_sw')\n",
      "[INFO@root] strategy: CsvSwStrategy('https://query.wikidata.org/sparql', returnFormat='csv')\n",
      "[INFO@root] endpoint: 'https://query.wikidata.org/sparql'\n",
      "[INFO@root] returnFormat: 'csv'\n",
      "[INFO@root] CsvSwStrategy('https://query.wikidata.org/sparql', returnFormat='csv').setOnlyConnreg(True)\n",
      "[INFO@root] result: <SPARQLWrapper.Wrapper.QueryResult object at 0x000001E1EEC008E0>\n",
      "[INFO@root] content_type: 'text/csv;charset=utf-8'\n",
      "[INFO@root] pandas.read_csv(<http.client.HTTPResponse object at 0x000001E1EEC01D50>, **{'index_col': 'glottocode', 'na_values': '', 'keep_default_na': False, 'encoding': 'utf-8'})\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: total: 15.6 ms\n",
      "Wall time: 7.7 s\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>qid</th>\n",
       "      <th>name</th>\n",
       "      <th>title</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>glottocode</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>aant1238</th>\n",
       "      <td>Q31312216</td>\n",
       "      <td>Aantantara</td>\n",
       "      <td>&lt;NA&gt;</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>aari1238</th>\n",
       "      <td>Q85516014</td>\n",
       "      <td>Aari-Gayil</td>\n",
       "      <td>&lt;NA&gt;</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>aari1239</th>\n",
       "      <td>Q7495</td>\n",
       "      <td>Aari</td>\n",
       "      <td>Aari_language</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>aari1240</th>\n",
       "      <td>Q4661732</td>\n",
       "      <td>Aariya</td>\n",
       "      <td>Aariya_language</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>aasa1238</th>\n",
       "      <td>Q56620</td>\n",
       "      <td>Asa</td>\n",
       "      <td>Asa_language</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>zumb1240</th>\n",
       "      <td>Q56252</td>\n",
       "      <td>Zumbun</td>\n",
       "      <td>Zumbun_language</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>zuni1245</th>\n",
       "      <td>Q10188</td>\n",
       "      <td>Zuni</td>\n",
       "      <td>Zuni_language</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>zuoj1238</th>\n",
       "      <td>Q13848149</td>\n",
       "      <td>Zuojiang Zhuang</td>\n",
       "      <td>Zuojiang_Zhuang_languages</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>zuri1238</th>\n",
       "      <td>Q248682</td>\n",
       "      <td>Zurich German</td>\n",
       "      <td>Z%C3%BCrich_German</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>zyph1238</th>\n",
       "      <td>Q57004</td>\n",
       "      <td>Zyphe</td>\n",
       "      <td>Zyphe_language</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>10873 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                  qid             name                      title\n",
       "glottocode                                                       \n",
       "aant1238    Q31312216       Aantantara                       <NA>\n",
       "aari1238    Q85516014       Aari-Gayil                       <NA>\n",
       "aari1239        Q7495             Aari              Aari_language\n",
       "aari1240     Q4661732           Aariya            Aariya_language\n",
       "aasa1238       Q56620              Asa               Asa_language\n",
       "...               ...              ...                        ...\n",
       "zumb1240       Q56252           Zumbun            Zumbun_language\n",
       "zuni1245       Q10188             Zuni              Zuni_language\n",
       "zuoj1238    Q13848149  Zuojiang Zhuang  Zuojiang_Zhuang_languages\n",
       "zuri1238      Q248682    Zurich German         Z%C3%BCrich_German\n",
       "zyph1238       Q57004            Zyphe             Zyphe_language\n",
       "\n",
       "[10873 rows x 3 columns]"
      ]
     },
     "execution_count": 55,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%time read_wikidata(strategy='csv_sw', index_col='glottocode')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e5eb79e1-5a40-4d66-9951-056fbf3e440d",
   "metadata": {},
   "source": [
    "### `strategy='xml_sw'`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "id": "6d36993c-766f-431c-af72-b341afbd4689",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[INFO@root] ReadSparqlQuery('https://query.wikidata.org/sparql', strategy='xml_sw')\n",
      "[INFO@root] strategy: XmlSwStrategy('https://query.wikidata.org/sparql', returnFormat='xml')\n",
      "[INFO@root] endpoint: 'https://query.wikidata.org/sparql'\n",
      "[INFO@root] returnFormat: 'xml'\n",
      "[INFO@root] result: <SPARQLWrapper.Wrapper.QueryResult object at 0x000001E1EEB99E40>\n",
      "[INFO@root] content_type: 'application/sparql-results+xml;charset=utf-8'\n",
      "[INFO@root] xml: <Element '{http://www.w3.org/2005/sparql-results#}sparql' at 0x000001E1F3318540>\n",
      "[INFO@root] pandas.DataFrame.from_records(<generator object iterrecords at 0x000001E1EF2D09E0>, **{'index': 'glottocode', 'columns': ['glottocode', 'qid', 'name', 'title'], 'coerce_float': True})\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: total: 625 ms\n",
      "Wall time: 8.97 s\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>qid</th>\n",
       "      <th>name</th>\n",
       "      <th>title</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>glottocode</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>aant1238</th>\n",
       "      <td>Q31312216</td>\n",
       "      <td>Aantantara</td>\n",
       "      <td>&lt;NA&gt;</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>aari1238</th>\n",
       "      <td>Q85516014</td>\n",
       "      <td>Aari-Gayil</td>\n",
       "      <td>&lt;NA&gt;</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>aari1239</th>\n",
       "      <td>Q7495</td>\n",
       "      <td>Aari</td>\n",
       "      <td>Aari_language</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>aari1240</th>\n",
       "      <td>Q4661732</td>\n",
       "      <td>Aariya</td>\n",
       "      <td>Aariya_language</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>aasa1238</th>\n",
       "      <td>Q56620</td>\n",
       "      <td>Asa</td>\n",
       "      <td>Asa_language</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>zumb1240</th>\n",
       "      <td>Q56252</td>\n",
       "      <td>Zumbun</td>\n",
       "      <td>Zumbun_language</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>zuni1245</th>\n",
       "      <td>Q10188</td>\n",
       "      <td>Zuni</td>\n",
       "      <td>Zuni_language</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>zuoj1238</th>\n",
       "      <td>Q13848149</td>\n",
       "      <td>Zuojiang Zhuang</td>\n",
       "      <td>Zuojiang_Zhuang_languages</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>zuri1238</th>\n",
       "      <td>Q248682</td>\n",
       "      <td>Zurich German</td>\n",
       "      <td>Z%C3%BCrich_German</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>zyph1238</th>\n",
       "      <td>Q57004</td>\n",
       "      <td>Zyphe</td>\n",
       "      <td>Zyphe_language</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>10873 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                  qid             name                      title\n",
       "glottocode                                                       \n",
       "aant1238    Q31312216       Aantantara                       <NA>\n",
       "aari1238    Q85516014       Aari-Gayil                       <NA>\n",
       "aari1239        Q7495             Aari              Aari_language\n",
       "aari1240     Q4661732           Aariya            Aariya_language\n",
       "aasa1238       Q56620              Asa               Asa_language\n",
       "...               ...              ...                        ...\n",
       "zumb1240       Q56252           Zumbun            Zumbun_language\n",
       "zuni1245       Q10188             Zuni              Zuni_language\n",
       "zuoj1238    Q13848149  Zuojiang Zhuang  Zuojiang_Zhuang_languages\n",
       "zuri1238      Q248682    Zurich German         Z%C3%BCrich_German\n",
       "zyph1238       Q57004            Zyphe             Zyphe_language\n",
       "\n",
       "[10873 rows x 3 columns]"
      ]
     },
     "execution_count": 56,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%time read_wikidata(strategy='xml_sw', index='glottocode')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "56b5fcd4-15d4-49f1-830d-3aabf15c80ca",
   "metadata": {},
   "source": [
    "### `strategy='json'`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "id": "4bcf9fc1-2bd3-44ac-bc7d-771484626866",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[INFO@root] ReadSparqlQuery('https://query.wikidata.org/sparql', strategy='json')\n",
      "[INFO@root] strategy: JsonResultStrategy('https://query.wikidata.org/sparql', returnFormat='json')\n",
      "[INFO@root] endpoint: 'https://query.wikidata.org/sparql'\n",
      "[INFO@root] result: <SPARQLWrapper.SmartWrapper.Bindings object at 0x000001E1EEB9B1F0>\n",
      "[INFO@root] result.variables: ['glottocode', 'qid', 'name', 'title']\n",
      "[INFO@root] pandas.DataFrame.from_records(<generator object read_sparql_query_json_result.<locals>.<genexpr> at 0x000001E1EF26C190>, **{'index': 'glottocode', 'columns': ['glottocode', 'qid', 'name', 'title'], 'coerce_float': True})\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: total: 125 ms\n",
      "Wall time: 5.3 s\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>qid</th>\n",
       "      <th>name</th>\n",
       "      <th>title</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>glottocode</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>aant1238</th>\n",
       "      <td>Q31312216</td>\n",
       "      <td>Aantantara</td>\n",
       "      <td>&lt;NA&gt;</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>aari1238</th>\n",
       "      <td>Q85516014</td>\n",
       "      <td>Aari-Gayil</td>\n",
       "      <td>&lt;NA&gt;</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>aari1239</th>\n",
       "      <td>Q7495</td>\n",
       "      <td>Aari</td>\n",
       "      <td>Aari_language</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>aari1240</th>\n",
       "      <td>Q4661732</td>\n",
       "      <td>Aariya</td>\n",
       "      <td>Aariya_language</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>aasa1238</th>\n",
       "      <td>Q56620</td>\n",
       "      <td>Asa</td>\n",
       "      <td>Asa_language</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>zumb1240</th>\n",
       "      <td>Q56252</td>\n",
       "      <td>Zumbun</td>\n",
       "      <td>Zumbun_language</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>zuni1245</th>\n",
       "      <td>Q10188</td>\n",
       "      <td>Zuni</td>\n",
       "      <td>Zuni_language</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>zuoj1238</th>\n",
       "      <td>Q13848149</td>\n",
       "      <td>Zuojiang Zhuang</td>\n",
       "      <td>Zuojiang_Zhuang_languages</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>zuri1238</th>\n",
       "      <td>Q248682</td>\n",
       "      <td>Zurich German</td>\n",
       "      <td>Z%C3%BCrich_German</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>zyph1238</th>\n",
       "      <td>Q57004</td>\n",
       "      <td>Zyphe</td>\n",
       "      <td>Zyphe_language</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>10873 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                  qid             name                      title\n",
       "glottocode                                                       \n",
       "aant1238    Q31312216       Aantantara                       <NA>\n",
       "aari1238    Q85516014       Aari-Gayil                       <NA>\n",
       "aari1239        Q7495             Aari              Aari_language\n",
       "aari1240     Q4661732           Aariya            Aariya_language\n",
       "aasa1238       Q56620              Asa               Asa_language\n",
       "...               ...              ...                        ...\n",
       "zumb1240       Q56252           Zumbun            Zumbun_language\n",
       "zuni1245       Q10188             Zuni              Zuni_language\n",
       "zuoj1238    Q13848149  Zuojiang Zhuang  Zuojiang_Zhuang_languages\n",
       "zuri1238      Q248682    Zurich German         Z%C3%BCrich_German\n",
       "zyph1238       Q57004            Zyphe             Zyphe_language\n",
       "\n",
       "[10873 rows x 3 columns]"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%time read_wikidata(strategy='json', index='glottocode')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3aa86137-540b-49c0-bdbd-c5d98d269b24",
   "metadata": {},
   "source": [
    "### `strategy='json_normalize'`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "id": "5642ca34-b0a3-49a9-a715-b3d2262c05cc",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[INFO@root] ReadSparqlQuery('https://query.wikidata.org/sparql', strategy='json_normalize')\n",
      "[INFO@root] strategy: JsonNormalizeStrategy('https://query.wikidata.org/sparql', returnFormat='json')\n",
      "[INFO@root] endpoint: 'https://query.wikidata.org/sparql'\n",
      "[INFO@root] returnFormat: 'json'\n",
      "[INFO@root] result: <SPARQLWrapper.Wrapper.QueryResult object at 0x000001E1EEB9B280>\n",
      "[INFO@root] content_type: 'application/sparql-results+json;charset=utf-8'\n",
      "[INFO@root] result['head']: {'vars': ['glottocode', 'qid', 'name', 'title']}\n",
      "[INFO@root] pandas.json_normalize(jsondata, **{'record_path': ['results', 'bindings']})\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: total: 391 ms\n",
      "Wall time: 4.53 s\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>qid</th>\n",
       "      <th>name</th>\n",
       "      <th>title</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>glottocode</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>aant1238</th>\n",
       "      <td>Q31312216</td>\n",
       "      <td>Aantantara</td>\n",
       "      <td>&lt;NA&gt;</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>aari1238</th>\n",
       "      <td>Q85516014</td>\n",
       "      <td>Aari-Gayil</td>\n",
       "      <td>&lt;NA&gt;</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>aari1239</th>\n",
       "      <td>Q7495</td>\n",
       "      <td>Aari</td>\n",
       "      <td>Aari_language</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>aari1240</th>\n",
       "      <td>Q4661732</td>\n",
       "      <td>Aariya</td>\n",
       "      <td>Aariya_language</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>aasa1238</th>\n",
       "      <td>Q56620</td>\n",
       "      <td>Asa</td>\n",
       "      <td>Asa_language</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>zumb1240</th>\n",
       "      <td>Q56252</td>\n",
       "      <td>Zumbun</td>\n",
       "      <td>Zumbun_language</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>zuni1245</th>\n",
       "      <td>Q10188</td>\n",
       "      <td>Zuni</td>\n",
       "      <td>Zuni_language</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>zuoj1238</th>\n",
       "      <td>Q13848149</td>\n",
       "      <td>Zuojiang Zhuang</td>\n",
       "      <td>Zuojiang_Zhuang_languages</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>zuri1238</th>\n",
       "      <td>Q248682</td>\n",
       "      <td>Zurich German</td>\n",
       "      <td>Z%C3%BCrich_German</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>zyph1238</th>\n",
       "      <td>Q57004</td>\n",
       "      <td>Zyphe</td>\n",
       "      <td>Zyphe_language</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>10873 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                  qid             name                      title\n",
       "glottocode                                                       \n",
       "aant1238    Q31312216       Aantantara                       <NA>\n",
       "aari1238    Q85516014       Aari-Gayil                       <NA>\n",
       "aari1239        Q7495             Aari              Aari_language\n",
       "aari1240     Q4661732           Aariya            Aariya_language\n",
       "aasa1238       Q56620              Asa               Asa_language\n",
       "...               ...              ...                        ...\n",
       "zumb1240       Q56252           Zumbun            Zumbun_language\n",
       "zuni1245       Q10188             Zuni              Zuni_language\n",
       "zuoj1238    Q13848149  Zuojiang Zhuang  Zuojiang_Zhuang_languages\n",
       "zuri1238      Q248682    Zurich German         Z%C3%BCrich_German\n",
       "zyph1238       Q57004            Zyphe             Zyphe_language\n",
       "\n",
       "[10873 rows x 3 columns]"
      ]
     },
     "execution_count": 58,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%time read_wikidata(strategy='json_normalize').set_index('glottocode')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}