Forked from gustavofonseca/Extração de dados dos documentos de 2017-2020.ipynb
Created
November 17, 2021 14:43
-
-
Save rafaelpezzuto/dbc0aa2dbe31161b0cf1e4b605c131ad to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import os\n", | |
"import json\n", | |
"from lxml import etree\n", | |
"import pandas as pd\n", | |
"import concurrent.futures" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"XMLS_BASEDIR = \"/Users/gustavofonseca/.scielo-kerneldump\"" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def iter_files(base_dir):\n", | |
" for root, dirs, files in os.walk(base_dir):\n", | |
" for file in files:\n", | |
" yield os.path.join(root, file)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def _toint(value, default=-1):\n", | |
" try:\n", | |
" return int(value)\n", | |
" except:\n", | |
" return default" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 1.49 s, sys: 3.3 s, total: 4.79 s\n", | |
"Wall time: 6.1 s\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"83186" | |
] | |
}, | |
"execution_count": 18, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"xml_files = list(iter_files(XMLS_BASEDIR))\n", | |
"len(xml_files)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def get_first_text(xml, expr):\n", | |
" try:\n", | |
" first_occ = xml.xpath(expr)[0]\n", | |
" except IndexError:\n", | |
" return None\n", | |
" \n", | |
" try:\n", | |
" text = first_occ.text\n", | |
" except AttributeError:\n", | |
" text = first_occ\n", | |
" \n", | |
" try:\n", | |
" return text.strip()\n", | |
" except AttributeError:\n", | |
" return text\n", | |
"\n", | |
"\n", | |
"def describe(xml_filename):\n", | |
" xml = etree.parse(xml_filename)\n", | |
" return {\n", | |
" \"total_docs\": 1,\n", | |
" \"filename\": xml_filename,\n", | |
" \"article_type\": get_first_text(xml, '/article/@article-type'),\n", | |
" \"doi\": get_first_text(xml, '/article/front/article-meta/article-id[@pub-id-type=\"doi\"]'),\n", | |
" \"month\": _toint(\n", | |
" get_first_text(\n", | |
" xml, \n", | |
" \"/article/front/article-meta/pub-date[@pub-type = 'epub' or @pub-type = 'epub-ppub' or @date-type = 'pub']/month\",\n", | |
" ),\n", | |
" default=0\n", | |
" ),\n", | |
" \"year\": _toint(\n", | |
" get_first_text(\n", | |
" xml, \n", | |
" \"/article/front/article-meta/pub-date[@pub-type = 'epub' or @pub-type = 'epub-ppub' or @date-type = 'pub']/year\"\n", | |
" )\n", | |
" ),\n", | |
" \"table_wrap_count\": len(xml.xpath(\"/article/body//table-wrap\")),\n", | |
" \"table_wrap_table_count\": len(xml.xpath(\"/article/body//table-wrap/table\")),\n", | |
" \"table_wrap_graphic_count\": len(xml.xpath(\"/article/body//table-wrap/graphic\")),\n", | |
" \"table_wrap_label_count\": len(xml.xpath(\"/article/body//table-wrap/label\")),\n", | |
" \"table_wrap_caption_title_count\": len(xml.xpath(\"/article/body//table-wrap/caption/title\")),\n", | |
" \"fig_count\": len(xml.xpath(\"/article/body//fig\")),\n", | |
" \"fig_label_count\": len(xml.xpath(\"/article/body//fig/label\")),\n", | |
" \"fig_caption_title_count\": len(xml.xpath(\"/article/body//fig/caption/title\")),\n", | |
" \"has_subarticle_translation\": bool(xml.xpath(\"/article/sub-article[@article-type = 'translation']\")),\n", | |
" \"subarticle_translation_table_wrap_count\": len(xml.xpath(\"/article/sub-article[@article-type = 'translation']/body//table-wrap\")),\n", | |
" \"subarticle_translation_table_wrap_label_count\": len(xml.xpath(\"/article/sub-article[@article-type = 'translation']/body//table-wrap/label\")),\n", | |
" \"subarticle_translation_table_wrap_caption_title_count\": len(xml.xpath(\"/article/sub-article[@article-type = 'translation']/body//table-wrap/caption/title\")),\n", | |
" \"subarticle_translation_fig_count\": len(xml.xpath(\"/article/sub-article[@article-type = 'translation']/body//fig\")),\n", | |
" \"subarticle_translation_fig_label_count\": len(xml.xpath(\"/article/sub-article[@article-type = 'translation']/body//fig/label\")),\n", | |
" \"subarticle_translation_fig_caption_title_count\": len(xml.xpath(\"/article/sub-article[@article-type = 'translation']/body//fig/caption/title\")),\n", | |
" }" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'article_type': 'research-article',\n", | |
" 'doi': '10.21577/0103-5053.20160329',\n", | |
" 'fig_caption_title_count': 4,\n", | |
" 'fig_count': 4,\n", | |
" 'fig_label_count': 4,\n", | |
" 'filename': '/Users/gustavofonseca/.scielo-kerneldump/kernel/2017/Yk6g/Yk6g3tKzqScFC8BbD8jmXbR.xml',\n", | |
" 'has_subarticle_translation': False,\n", | |
" 'month': 9,\n", | |
" 'subarticle_translation_fig_caption_title_count': 0,\n", | |
" 'subarticle_translation_fig_count': 0,\n", | |
" 'subarticle_translation_fig_label_count': 0,\n", | |
" 'subarticle_translation_table_wrap_caption_title_count': 0,\n", | |
" 'subarticle_translation_table_wrap_count': 0,\n", | |
" 'subarticle_translation_table_wrap_label_count': 0,\n", | |
" 'table_wrap_caption_title_count': 2,\n", | |
" 'table_wrap_count': 2,\n", | |
" 'table_wrap_graphic_count': 0,\n", | |
" 'table_wrap_label_count': 2,\n", | |
" 'table_wrap_table_count': 2,\n", | |
" 'total_docs': 1,\n", | |
" 'year': 2017}\n" | |
] | |
} | |
], | |
"source": [ | |
"from pprint import pprint as pp\n", | |
"pp(describe(xml_files[0]))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 11min 16s, sys: 1min 19s, total: 12min 36s\n", | |
"Wall time: 4min 3s\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"with concurrent.futures.ThreadPoolExecutor() as executor:\n", | |
" with open(\"descriptions2.jsonl\", \"w\") as output:\n", | |
" for filepath, description in zip(xml_files, executor.map(describe, xml_files)):\n", | |
" output.write(json.dumps(description)+\"\\n\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"83186" | |
] | |
}, | |
"execution_count": 22, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"with open(\"descriptions2.jsonl\") as input:\n", | |
" descriptions = [json.loads(line) for line in input]\n", | |
"len(descriptions)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(82723, 21)" | |
] | |
}, | |
"execution_count": 23, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"descriptions_gte_2017 = [d for d in descriptions if _toint(d[\"year\"]) >= 2017]\n", | |
"df = pd.DataFrame.from_records(descriptions_gte_2017)\n", | |
"df.shape" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"O que queremos saber:\n", | |
"1. Qual o percentual de tabelas codificadas em HTML?\n", | |
"2. Qual o percentual de tabelas que possuem *label*?\n", | |
"3. Qual o percentual de tabelas que possuem *caption*?\n", | |
"4. Qual o percentual de figuras que possuem *label*?\n", | |
"5. Qual o percentual de figuras que possuem *caption*?" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 24, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th>total_docs</th>\n", | |
" <th>table_wrap_count</th>\n", | |
" <th>table_wrap_table_count</th>\n", | |
" <th>table_wrap_graphic_count</th>\n", | |
" <th>table_wrap_label_count</th>\n", | |
" <th>table_wrap_caption_title_count</th>\n", | |
" <th>fig_count</th>\n", | |
" <th>fig_label_count</th>\n", | |
" <th>fig_caption_title_count</th>\n", | |
" <th>has_subarticle_translation</th>\n", | |
" <th>...</th>\n", | |
" <th>subarticle_translation_table_wrap_label_count</th>\n", | |
" <th>subarticle_translation_table_wrap_caption_title_count</th>\n", | |
" <th>subarticle_translation_fig_count</th>\n", | |
" <th>subarticle_translation_fig_label_count</th>\n", | |
" <th>subarticle_translation_fig_caption_title_count</th>\n", | |
" <th>percentage_html_tables</th>\n", | |
" <th>percentage_tables_with_labels</th>\n", | |
" <th>percentage_tables_with_captions_titles</th>\n", | |
" <th>percentage_fig_with_labels</th>\n", | |
" <th>percentage_fig_with_captions_titles</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>year</th>\n", | |
" <th>month</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th rowspan=\"13\" valign=\"top\">2017</th>\n", | |
" <th>0</th>\n", | |
" <td>10937</td>\n", | |
" <td>21465</td>\n", | |
" <td>18827</td>\n", | |
" <td>2009</td>\n", | |
" <td>20818</td>\n", | |
" <td>20936</td>\n", | |
" <td>24384</td>\n", | |
" <td>23942</td>\n", | |
" <td>23923</td>\n", | |
" <td>2605.0</td>\n", | |
" <td>...</td>\n", | |
" <td>4884</td>\n", | |
" <td>4945</td>\n", | |
" <td>3705</td>\n", | |
" <td>3669</td>\n", | |
" <td>3672</td>\n", | |
" <td>87.7102</td>\n", | |
" <td>96.9858</td>\n", | |
" <td>97.5355</td>\n", | |
" <td>98.187336</td>\n", | |
" <td>98.109416</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>539</td>\n", | |
" <td>1135</td>\n", | |
" <td>978</td>\n", | |
" <td>10</td>\n", | |
" <td>1103</td>\n", | |
" <td>1103</td>\n", | |
" <td>1507</td>\n", | |
" <td>1495</td>\n", | |
" <td>1493</td>\n", | |
" <td>111.0</td>\n", | |
" <td>...</td>\n", | |
" <td>209</td>\n", | |
" <td>209</td>\n", | |
" <td>131</td>\n", | |
" <td>129</td>\n", | |
" <td>129</td>\n", | |
" <td>86.1674</td>\n", | |
" <td>97.1806</td>\n", | |
" <td>97.1806</td>\n", | |
" <td>99.203716</td>\n", | |
" <td>99.071002</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>586</td>\n", | |
" <td>1440</td>\n", | |
" <td>1227</td>\n", | |
" <td>164</td>\n", | |
" <td>1415</td>\n", | |
" <td>1414</td>\n", | |
" <td>1564</td>\n", | |
" <td>1547</td>\n", | |
" <td>1550</td>\n", | |
" <td>107.0</td>\n", | |
" <td>...</td>\n", | |
" <td>277</td>\n", | |
" <td>277</td>\n", | |
" <td>225</td>\n", | |
" <td>220</td>\n", | |
" <td>220</td>\n", | |
" <td>85.2083</td>\n", | |
" <td>98.2639</td>\n", | |
" <td>98.1944</td>\n", | |
" <td>98.913043</td>\n", | |
" <td>99.104859</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1083</td>\n", | |
" <td>2569</td>\n", | |
" <td>2026</td>\n", | |
" <td>405</td>\n", | |
" <td>2549</td>\n", | |
" <td>2548</td>\n", | |
" <td>3106</td>\n", | |
" <td>3098</td>\n", | |
" <td>3102</td>\n", | |
" <td>196.0</td>\n", | |
" <td>...</td>\n", | |
" <td>489</td>\n", | |
" <td>489</td>\n", | |
" <td>280</td>\n", | |
" <td>280</td>\n", | |
" <td>280</td>\n", | |
" <td>78.8634</td>\n", | |
" <td>99.2215</td>\n", | |
" <td>99.1826</td>\n", | |
" <td>99.742434</td>\n", | |
" <td>99.871217</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>1043</td>\n", | |
" <td>2219</td>\n", | |
" <td>1773</td>\n", | |
" <td>361</td>\n", | |
" <td>2211</td>\n", | |
" <td>2211</td>\n", | |
" <td>2502</td>\n", | |
" <td>2456</td>\n", | |
" <td>2464</td>\n", | |
" <td>178.0</td>\n", | |
" <td>...</td>\n", | |
" <td>441</td>\n", | |
" <td>441</td>\n", | |
" <td>367</td>\n", | |
" <td>367</td>\n", | |
" <td>367</td>\n", | |
" <td>79.9009</td>\n", | |
" <td>99.6395</td>\n", | |
" <td>99.6395</td>\n", | |
" <td>98.161471</td>\n", | |
" <td>98.481215</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>960</td>\n", | |
" <td>2156</td>\n", | |
" <td>1759</td>\n", | |
" <td>310</td>\n", | |
" <td>2150</td>\n", | |
" <td>2145</td>\n", | |
" <td>2418</td>\n", | |
" <td>2400</td>\n", | |
" <td>2401</td>\n", | |
" <td>140.0</td>\n", | |
" <td>...</td>\n", | |
" <td>360</td>\n", | |
" <td>360</td>\n", | |
" <td>184</td>\n", | |
" <td>184</td>\n", | |
" <td>182</td>\n", | |
" <td>81.5863</td>\n", | |
" <td>99.7217</td>\n", | |
" <td>99.4898</td>\n", | |
" <td>99.255583</td>\n", | |
" <td>99.296940</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>1144</td>\n", | |
" <td>2741</td>\n", | |
" <td>2253</td>\n", | |
" <td>468</td>\n", | |
" <td>2709</td>\n", | |
" <td>2712</td>\n", | |
" <td>3560</td>\n", | |
" <td>3540</td>\n", | |
" <td>3515</td>\n", | |
" <td>240.0</td>\n", | |
" <td>...</td>\n", | |
" <td>562</td>\n", | |
" <td>565</td>\n", | |
" <td>340</td>\n", | |
" <td>338</td>\n", | |
" <td>337</td>\n", | |
" <td>82.1963</td>\n", | |
" <td>98.8325</td>\n", | |
" <td>98.942</td>\n", | |
" <td>99.438202</td>\n", | |
" <td>98.735955</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>930</td>\n", | |
" <td>2067</td>\n", | |
" <td>1641</td>\n", | |
" <td>351</td>\n", | |
" <td>2064</td>\n", | |
" <td>2064</td>\n", | |
" <td>2434</td>\n", | |
" <td>2420</td>\n", | |
" <td>2421</td>\n", | |
" <td>199.0</td>\n", | |
" <td>...</td>\n", | |
" <td>389</td>\n", | |
" <td>388</td>\n", | |
" <td>206</td>\n", | |
" <td>204</td>\n", | |
" <td>205</td>\n", | |
" <td>79.3904</td>\n", | |
" <td>99.8549</td>\n", | |
" <td>99.8549</td>\n", | |
" <td>99.424815</td>\n", | |
" <td>99.465900</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>1187</td>\n", | |
" <td>2573</td>\n", | |
" <td>2186</td>\n", | |
" <td>266</td>\n", | |
" <td>2536</td>\n", | |
" <td>2536</td>\n", | |
" <td>3127</td>\n", | |
" <td>3110</td>\n", | |
" <td>3108</td>\n", | |
" <td>178.0</td>\n", | |
" <td>...</td>\n", | |
" <td>343</td>\n", | |
" <td>342</td>\n", | |
" <td>265</td>\n", | |
" <td>265</td>\n", | |
" <td>265</td>\n", | |
" <td>84.9592</td>\n", | |
" <td>98.562</td>\n", | |
" <td>98.562</td>\n", | |
" <td>99.456348</td>\n", | |
" <td>99.392389</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>827</td>\n", | |
" <td>1929</td>\n", | |
" <td>1644</td>\n", | |
" <td>250</td>\n", | |
" <td>1928</td>\n", | |
" <td>1928</td>\n", | |
" <td>2548</td>\n", | |
" <td>2523</td>\n", | |
" <td>2520</td>\n", | |
" <td>209.0</td>\n", | |
" <td>...</td>\n", | |
" <td>486</td>\n", | |
" <td>486</td>\n", | |
" <td>351</td>\n", | |
" <td>346</td>\n", | |
" <td>346</td>\n", | |
" <td>85.2255</td>\n", | |
" <td>99.9482</td>\n", | |
" <td>99.9482</td>\n", | |
" <td>99.018838</td>\n", | |
" <td>98.901099</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10</th>\n", | |
" <td>892</td>\n", | |
" <td>2119</td>\n", | |
" <td>1999</td>\n", | |
" <td>82</td>\n", | |
" <td>2115</td>\n", | |
" <td>2115</td>\n", | |
" <td>2779</td>\n", | |
" <td>2772</td>\n", | |
" <td>2759</td>\n", | |
" <td>162.0</td>\n", | |
" <td>...</td>\n", | |
" <td>365</td>\n", | |
" <td>365</td>\n", | |
" <td>257</td>\n", | |
" <td>256</td>\n", | |
" <td>250</td>\n", | |
" <td>94.337</td>\n", | |
" <td>99.8112</td>\n", | |
" <td>99.8112</td>\n", | |
" <td>99.748111</td>\n", | |
" <td>99.280317</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>810</td>\n", | |
" <td>1932</td>\n", | |
" <td>1834</td>\n", | |
" <td>1</td>\n", | |
" <td>1928</td>\n", | |
" <td>1928</td>\n", | |
" <td>2154</td>\n", | |
" <td>2152</td>\n", | |
" <td>2152</td>\n", | |
" <td>166.0</td>\n", | |
" <td>...</td>\n", | |
" <td>417</td>\n", | |
" <td>417</td>\n", | |
" <td>275</td>\n", | |
" <td>273</td>\n", | |
" <td>273</td>\n", | |
" <td>94.9275</td>\n", | |
" <td>99.793</td>\n", | |
" <td>99.793</td>\n", | |
" <td>99.907149</td>\n", | |
" <td>99.907149</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>1397</td>\n", | |
" <td>2618</td>\n", | |
" <td>2557</td>\n", | |
" <td>11</td>\n", | |
" <td>2599</td>\n", | |
" <td>2599</td>\n", | |
" <td>3353</td>\n", | |
" <td>3320</td>\n", | |
" <td>3318</td>\n", | |
" <td>208.0</td>\n", | |
" <td>...</td>\n", | |
" <td>440</td>\n", | |
" <td>439</td>\n", | |
" <td>327</td>\n", | |
" <td>314</td>\n", | |
" <td>315</td>\n", | |
" <td>97.67</td>\n", | |
" <td>99.2743</td>\n", | |
" <td>99.2743</td>\n", | |
" <td>99.015807</td>\n", | |
" <td>98.956159</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th rowspan=\"13\" valign=\"top\">2018</th>\n", | |
" <th>0</th>\n", | |
" <td>10759</td>\n", | |
" <td>21612</td>\n", | |
" <td>20660</td>\n", | |
" <td>16</td>\n", | |
" <td>21020</td>\n", | |
" <td>21018</td>\n", | |
" <td>23669</td>\n", | |
" <td>23128</td>\n", | |
" <td>23114</td>\n", | |
" <td>2420.0</td>\n", | |
" <td>...</td>\n", | |
" <td>4297</td>\n", | |
" <td>4306</td>\n", | |
" <td>3010</td>\n", | |
" <td>2995</td>\n", | |
" <td>3000</td>\n", | |
" <td>95.595</td>\n", | |
" <td>97.2608</td>\n", | |
" <td>97.2515</td>\n", | |
" <td>97.714310</td>\n", | |
" <td>97.655161</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>709</td>\n", | |
" <td>1320</td>\n", | |
" <td>1321</td>\n", | |
" <td>0</td>\n", | |
" <td>1303</td>\n", | |
" <td>1305</td>\n", | |
" <td>2124</td>\n", | |
" <td>2118</td>\n", | |
" <td>2115</td>\n", | |
" <td>115.0</td>\n", | |
" <td>...</td>\n", | |
" <td>199</td>\n", | |
" <td>199</td>\n", | |
" <td>178</td>\n", | |
" <td>178</td>\n", | |
" <td>178</td>\n", | |
" <td>100.076</td>\n", | |
" <td>98.7121</td>\n", | |
" <td>98.8636</td>\n", | |
" <td>99.717514</td>\n", | |
" <td>99.576271</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>863</td>\n", | |
" <td>1985</td>\n", | |
" <td>1959</td>\n", | |
" <td>0</td>\n", | |
" <td>1983</td>\n", | |
" <td>1983</td>\n", | |
" <td>2463</td>\n", | |
" <td>2456</td>\n", | |
" <td>2459</td>\n", | |
" <td>155.0</td>\n", | |
" <td>...</td>\n", | |
" <td>396</td>\n", | |
" <td>396</td>\n", | |
" <td>357</td>\n", | |
" <td>357</td>\n", | |
" <td>357</td>\n", | |
" <td>98.6902</td>\n", | |
" <td>99.8992</td>\n", | |
" <td>99.8992</td>\n", | |
" <td>99.715794</td>\n", | |
" <td>99.837596</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1089</td>\n", | |
" <td>2468</td>\n", | |
" <td>2449</td>\n", | |
" <td>0</td>\n", | |
" <td>2461</td>\n", | |
" <td>2460</td>\n", | |
" <td>3025</td>\n", | |
" <td>3014</td>\n", | |
" <td>3013</td>\n", | |
" <td>204.0</td>\n", | |
" <td>...</td>\n", | |
" <td>411</td>\n", | |
" <td>411</td>\n", | |
" <td>260</td>\n", | |
" <td>258</td>\n", | |
" <td>258</td>\n", | |
" <td>99.2301</td>\n", | |
" <td>99.7164</td>\n", | |
" <td>99.6759</td>\n", | |
" <td>99.636364</td>\n", | |
" <td>99.603306</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>988</td>\n", | |
" <td>1933</td>\n", | |
" <td>1918</td>\n", | |
" <td>1</td>\n", | |
" <td>1925</td>\n", | |
" <td>1928</td>\n", | |
" <td>2483</td>\n", | |
" <td>2451</td>\n", | |
" <td>2455</td>\n", | |
" <td>172.0</td>\n", | |
" <td>...</td>\n", | |
" <td>358</td>\n", | |
" <td>359</td>\n", | |
" <td>234</td>\n", | |
" <td>234</td>\n", | |
" <td>234</td>\n", | |
" <td>99.224</td>\n", | |
" <td>99.5861</td>\n", | |
" <td>99.7413</td>\n", | |
" <td>98.711236</td>\n", | |
" <td>98.872332</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>975</td>\n", | |
" <td>2133</td>\n", | |
" <td>2117</td>\n", | |
" <td>0</td>\n", | |
" <td>2128</td>\n", | |
" <td>2128</td>\n", | |
" <td>2857</td>\n", | |
" <td>2831</td>\n", | |
" <td>2828</td>\n", | |
" <td>236.0</td>\n", | |
" <td>...</td>\n", | |
" <td>560</td>\n", | |
" <td>560</td>\n", | |
" <td>403</td>\n", | |
" <td>396</td>\n", | |
" <td>376</td>\n", | |
" <td>99.2499</td>\n", | |
" <td>99.7656</td>\n", | |
" <td>99.7656</td>\n", | |
" <td>99.089954</td>\n", | |
" <td>98.984949</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>1155</td>\n", | |
" <td>2368</td>\n", | |
" <td>2338</td>\n", | |
" <td>0</td>\n", | |
" <td>2358</td>\n", | |
" <td>2358</td>\n", | |
" <td>3235</td>\n", | |
" <td>3231</td>\n", | |
" <td>3217</td>\n", | |
" <td>289.0</td>\n", | |
" <td>...</td>\n", | |
" <td>542</td>\n", | |
" <td>541</td>\n", | |
" <td>421</td>\n", | |
" <td>419</td>\n", | |
" <td>419</td>\n", | |
" <td>98.7331</td>\n", | |
" <td>99.5777</td>\n", | |
" <td>99.5777</td>\n", | |
" <td>99.876352</td>\n", | |
" <td>99.443586</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>1230</td>\n", | |
" <td>2911</td>\n", | |
" <td>2901</td>\n", | |
" <td>0</td>\n", | |
" <td>2903</td>\n", | |
" <td>2910</td>\n", | |
" <td>4593</td>\n", | |
" <td>4585</td>\n", | |
" <td>4585</td>\n", | |
" <td>259.0</td>\n", | |
" <td>...</td>\n", | |
" <td>574</td>\n", | |
" <td>581</td>\n", | |
" <td>365</td>\n", | |
" <td>365</td>\n", | |
" <td>365</td>\n", | |
" <td>99.6565</td>\n", | |
" <td>99.7252</td>\n", | |
" <td>99.9656</td>\n", | |
" <td>99.825822</td>\n", | |
" <td>99.825822</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>1114</td>\n", | |
" <td>2144</td>\n", | |
" <td>2107</td>\n", | |
" <td>0</td>\n", | |
" <td>2126</td>\n", | |
" <td>2125</td>\n", | |
" <td>2814</td>\n", | |
" <td>2802</td>\n", | |
" <td>2791</td>\n", | |
" <td>266.0</td>\n", | |
" <td>...</td>\n", | |
" <td>540</td>\n", | |
" <td>540</td>\n", | |
" <td>415</td>\n", | |
" <td>410</td>\n", | |
" <td>414</td>\n", | |
" <td>98.2743</td>\n", | |
" <td>99.1604</td>\n", | |
" <td>99.1138</td>\n", | |
" <td>99.573561</td>\n", | |
" <td>99.182658</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>788</td>\n", | |
" <td>1836</td>\n", | |
" <td>1815</td>\n", | |
" <td>0</td>\n", | |
" <td>1765</td>\n", | |
" <td>1768</td>\n", | |
" <td>1918</td>\n", | |
" <td>1913</td>\n", | |
" <td>1908</td>\n", | |
" <td>192.0</td>\n", | |
" <td>...</td>\n", | |
" <td>394</td>\n", | |
" <td>394</td>\n", | |
" <td>252</td>\n", | |
" <td>252</td>\n", | |
" <td>252</td>\n", | |
" <td>98.8562</td>\n", | |
" <td>96.1329</td>\n", | |
" <td>96.2963</td>\n", | |
" <td>99.739312</td>\n", | |
" <td>99.478624</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10</th>\n", | |
" <td>1302</td>\n", | |
" <td>3186</td>\n", | |
" <td>3172</td>\n", | |
" <td>0</td>\n", | |
" <td>3178</td>\n", | |
" <td>3178</td>\n", | |
" <td>4046</td>\n", | |
" <td>4042</td>\n", | |
" <td>4020</td>\n", | |
" <td>222.0</td>\n", | |
" <td>...</td>\n", | |
" <td>519</td>\n", | |
" <td>519</td>\n", | |
" <td>287</td>\n", | |
" <td>283</td>\n", | |
" <td>283</td>\n", | |
" <td>99.5606</td>\n", | |
" <td>99.7489</td>\n", | |
" <td>99.7489</td>\n", | |
" <td>99.901137</td>\n", | |
" <td>99.357390</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>1256</td>\n", | |
" <td>2710</td>\n", | |
" <td>2662</td>\n", | |
" <td>4</td>\n", | |
" <td>2702</td>\n", | |
" <td>2703</td>\n", | |
" <td>3309</td>\n", | |
" <td>3282</td>\n", | |
" <td>3294</td>\n", | |
" <td>276.0</td>\n", | |
" <td>...</td>\n", | |
" <td>661</td>\n", | |
" <td>660</td>\n", | |
" <td>414</td>\n", | |
" <td>407</td>\n", | |
" <td>408</td>\n", | |
" <td>98.2288</td>\n", | |
" <td>99.7048</td>\n", | |
" <td>99.7417</td>\n", | |
" <td>99.184044</td>\n", | |
" <td>99.546691</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>1044</td>\n", | |
" <td>2125</td>\n", | |
" <td>2125</td>\n", | |
" <td>0</td>\n", | |
" <td>2099</td>\n", | |
" <td>2101</td>\n", | |
" <td>2925</td>\n", | |
" <td>2906</td>\n", | |
" <td>2903</td>\n", | |
" <td>196.0</td>\n", | |
" <td>...</td>\n", | |
" <td>454</td>\n", | |
" <td>454</td>\n", | |
" <td>388</td>\n", | |
" <td>387</td>\n", | |
" <td>388</td>\n", | |
" <td>100</td>\n", | |
" <td>98.7765</td>\n", | |
" <td>98.8706</td>\n", | |
" <td>99.350427</td>\n", | |
" <td>99.247863</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th rowspan=\"14\" valign=\"top\">2019</th>\n", | |
" <th>0</th>\n", | |
" <td>1364</td>\n", | |
" <td>2761</td>\n", | |
" <td>2701</td>\n", | |
" <td>0</td>\n", | |
" <td>2672</td>\n", | |
" <td>2672</td>\n", | |
" <td>3199</td>\n", | |
" <td>3162</td>\n", | |
" <td>3165</td>\n", | |
" <td>325.0</td>\n", | |
" <td>...</td>\n", | |
" <td>639</td>\n", | |
" <td>639</td>\n", | |
" <td>433</td>\n", | |
" <td>432</td>\n", | |
" <td>432</td>\n", | |
" <td>97.8269</td>\n", | |
" <td>96.7765</td>\n", | |
" <td>96.7765</td>\n", | |
" <td>98.843389</td>\n", | |
" <td>98.937168</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>800</td>\n", | |
" <td>1604</td>\n", | |
" <td>1600</td>\n", | |
" <td>0</td>\n", | |
" <td>1560</td>\n", | |
" <td>1561</td>\n", | |
" <td>1899</td>\n", | |
" <td>1887</td>\n", | |
" <td>1861</td>\n", | |
" <td>249.0</td>\n", | |
" <td>...</td>\n", | |
" <td>487</td>\n", | |
" <td>488</td>\n", | |
" <td>335</td>\n", | |
" <td>333</td>\n", | |
" <td>333</td>\n", | |
" <td>99.7506</td>\n", | |
" <td>97.2569</td>\n", | |
" <td>97.3192</td>\n", | |
" <td>99.368088</td>\n", | |
" <td>97.998947</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>1086</td>\n", | |
" <td>2332</td>\n", | |
" <td>2308</td>\n", | |
" <td>0</td>\n", | |
" <td>2324</td>\n", | |
" <td>2325</td>\n", | |
" <td>2430</td>\n", | |
" <td>2423</td>\n", | |
" <td>2417</td>\n", | |
" <td>296.0</td>\n", | |
" <td>...</td>\n", | |
" <td>630</td>\n", | |
" <td>630</td>\n", | |
" <td>397</td>\n", | |
" <td>395</td>\n", | |
" <td>395</td>\n", | |
" <td>98.9708</td>\n", | |
" <td>99.6569</td>\n", | |
" <td>99.6998</td>\n", | |
" <td>99.711934</td>\n", | |
" <td>99.465021</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>830</td>\n", | |
" <td>1691</td>\n", | |
" <td>1694</td>\n", | |
" <td>0</td>\n", | |
" <td>1685</td>\n", | |
" <td>1685</td>\n", | |
" <td>2353</td>\n", | |
" <td>2301</td>\n", | |
" <td>2306</td>\n", | |
" <td>223.0</td>\n", | |
" <td>...</td>\n", | |
" <td>500</td>\n", | |
" <td>500</td>\n", | |
" <td>321</td>\n", | |
" <td>320</td>\n", | |
" <td>320</td>\n", | |
" <td>100.177</td>\n", | |
" <td>99.6452</td>\n", | |
" <td>99.6452</td>\n", | |
" <td>97.790055</td>\n", | |
" <td>98.002550</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>1853</td>\n", | |
" <td>3676</td>\n", | |
" <td>3682</td>\n", | |
" <td>0</td>\n", | |
" <td>3632</td>\n", | |
" <td>3640</td>\n", | |
" <td>4342</td>\n", | |
" <td>4318</td>\n", | |
" <td>4299</td>\n", | |
" <td>430.0</td>\n", | |
" <td>...</td>\n", | |
" <td>839</td>\n", | |
" <td>839</td>\n", | |
" <td>574</td>\n", | |
" <td>566</td>\n", | |
" <td>561</td>\n", | |
" <td>100.163</td>\n", | |
" <td>98.803</td>\n", | |
" <td>99.0207</td>\n", | |
" <td>99.447259</td>\n", | |
" <td>99.009673</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>2023</td>\n", | |
" <td>4339</td>\n", | |
" <td>4211</td>\n", | |
" <td>0</td>\n", | |
" <td>4270</td>\n", | |
" <td>4274</td>\n", | |
" <td>4819</td>\n", | |
" <td>4769</td>\n", | |
" <td>4773</td>\n", | |
" <td>393.0</td>\n", | |
" <td>...</td>\n", | |
" <td>770</td>\n", | |
" <td>772</td>\n", | |
" <td>654</td>\n", | |
" <td>650</td>\n", | |
" <td>649</td>\n", | |
" <td>97.05</td>\n", | |
" <td>98.4098</td>\n", | |
" <td>98.502</td>\n", | |
" <td>98.962440</td>\n", | |
" <td>99.045445</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>1593</td>\n", | |
" <td>3705</td>\n", | |
" <td>3621</td>\n", | |
" <td>0</td>\n", | |
" <td>3632</td>\n", | |
" <td>3618</td>\n", | |
" <td>4458</td>\n", | |
" <td>4436</td>\n", | |
" <td>4433</td>\n", | |
" <td>370.0</td>\n", | |
" <td>...</td>\n", | |
" <td>694</td>\n", | |
" <td>694</td>\n", | |
" <td>506</td>\n", | |
" <td>505</td>\n", | |
" <td>506</td>\n", | |
" <td>97.7328</td>\n", | |
" <td>98.0297</td>\n", | |
" <td>97.6518</td>\n", | |
" <td>99.506505</td>\n", | |
" <td>99.439210</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>1969</td>\n", | |
" <td>4328</td>\n", | |
" <td>4327</td>\n", | |
" <td>0</td>\n", | |
" <td>4238</td>\n", | |
" <td>4243</td>\n", | |
" <td>4547</td>\n", | |
" <td>4511</td>\n", | |
" <td>4541</td>\n", | |
" <td>448.0</td>\n", | |
" <td>...</td>\n", | |
" <td>960</td>\n", | |
" <td>962</td>\n", | |
" <td>767</td>\n", | |
" <td>753</td>\n", | |
" <td>754</td>\n", | |
" <td>99.9769</td>\n", | |
" <td>97.9205</td>\n", | |
" <td>98.036</td>\n", | |
" <td>99.208269</td>\n", | |
" <td>99.868045</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>1888</td>\n", | |
" <td>4128</td>\n", | |
" <td>4038</td>\n", | |
" <td>0</td>\n", | |
" <td>4052</td>\n", | |
" <td>4043</td>\n", | |
" <td>4706</td>\n", | |
" <td>4627</td>\n", | |
" <td>4668</td>\n", | |
" <td>497.0</td>\n", | |
" <td>...</td>\n", | |
" <td>933</td>\n", | |
" <td>934</td>\n", | |
" <td>790</td>\n", | |
" <td>785</td>\n", | |
" <td>785</td>\n", | |
" <td>97.8198</td>\n", | |
" <td>98.1589</td>\n", | |
" <td>97.9409</td>\n", | |
" <td>98.321292</td>\n", | |
" <td>99.192520</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>2314</td>\n", | |
" <td>4947</td>\n", | |
" <td>4794</td>\n", | |
" <td>0</td>\n", | |
" <td>4913</td>\n", | |
" <td>4915</td>\n", | |
" <td>5964</td>\n", | |
" <td>5903</td>\n", | |
" <td>5905</td>\n", | |
" <td>455.0</td>\n", | |
" <td>...</td>\n", | |
" <td>939</td>\n", | |
" <td>939</td>\n", | |
" <td>704</td>\n", | |
" <td>701</td>\n", | |
" <td>702</td>\n", | |
" <td>96.9072</td>\n", | |
" <td>99.3127</td>\n", | |
" <td>99.3531</td>\n", | |
" <td>98.977197</td>\n", | |
" <td>99.010731</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10</th>\n", | |
" <td>2197</td>\n", | |
" <td>4833</td>\n", | |
" <td>4824</td>\n", | |
" <td>0</td>\n", | |
" <td>4817</td>\n", | |
" <td>4814</td>\n", | |
" <td>5061</td>\n", | |
" <td>5040</td>\n", | |
" <td>5037</td>\n", | |
" <td>517.0</td>\n", | |
" <td>...</td>\n", | |
" <td>1158</td>\n", | |
" <td>1158</td>\n", | |
" <td>573</td>\n", | |
" <td>569</td>\n", | |
" <td>569</td>\n", | |
" <td>99.8138</td>\n", | |
" <td>99.6689</td>\n", | |
" <td>99.6069</td>\n", | |
" <td>99.585062</td>\n", | |
" <td>99.525785</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>2203</td>\n", | |
" <td>4899</td>\n", | |
" <td>4794</td>\n", | |
" <td>0</td>\n", | |
" <td>4800</td>\n", | |
" <td>4800</td>\n", | |
" <td>5618</td>\n", | |
" <td>5540</td>\n", | |
" <td>5544</td>\n", | |
" <td>448.0</td>\n", | |
" <td>...</td>\n", | |
" <td>913</td>\n", | |
" <td>911</td>\n", | |
" <td>748</td>\n", | |
" <td>739</td>\n", | |
" <td>743</td>\n", | |
" <td>97.8567</td>\n", | |
" <td>97.9792</td>\n", | |
" <td>97.9792</td>\n", | |
" <td>98.611606</td>\n", | |
" <td>98.682805</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>2498</td>\n", | |
" <td>5644</td>\n", | |
" <td>5475</td>\n", | |
" <td>0</td>\n", | |
" <td>5369</td>\n", | |
" <td>5095</td>\n", | |
" <td>5992</td>\n", | |
" <td>5947</td>\n", | |
" <td>5943</td>\n", | |
" <td>699.0</td>\n", | |
" <td>...</td>\n", | |
" <td>1350</td>\n", | |
" <td>1351</td>\n", | |
" <td>1195</td>\n", | |
" <td>1191</td>\n", | |
" <td>1190</td>\n", | |
" <td>97.0057</td>\n", | |
" <td>95.1276</td>\n", | |
" <td>90.2729</td>\n", | |
" <td>99.248999</td>\n", | |
" <td>99.182243</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>109</th>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>4</td>\n", | |
" <td>4</td>\n", | |
" <td>4</td>\n", | |
" <td>0.0</td>\n", | |
" <td>...</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td></td>\n", | |
" <td></td>\n", | |
" <td></td>\n", | |
" <td>100.000000</td>\n", | |
" <td>100.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th rowspan=\"9\" valign=\"top\">2020</th>\n", | |
" <th>0</th>\n", | |
" <td>2</td>\n", | |
" <td>4</td>\n", | |
" <td>4</td>\n", | |
" <td>0</td>\n", | |
" <td>4</td>\n", | |
" <td>4</td>\n", | |
" <td>8</td>\n", | |
" <td>8</td>\n", | |
" <td>8</td>\n", | |
" <td>0.0</td>\n", | |
" <td>...</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>100</td>\n", | |
" <td>100</td>\n", | |
" <td>100</td>\n", | |
" <td>100.000000</td>\n", | |
" <td>100.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1336</td>\n", | |
" <td>2606</td>\n", | |
" <td>2629</td>\n", | |
" <td>0</td>\n", | |
" <td>2539</td>\n", | |
" <td>2541</td>\n", | |
" <td>3454</td>\n", | |
" <td>3350</td>\n", | |
" <td>3316</td>\n", | |
" <td>312.0</td>\n", | |
" <td>...</td>\n", | |
" <td>633</td>\n", | |
" <td>633</td>\n", | |
" <td>422</td>\n", | |
" <td>421</td>\n", | |
" <td>421</td>\n", | |
" <td>100.883</td>\n", | |
" <td>97.429</td>\n", | |
" <td>97.5058</td>\n", | |
" <td>96.988998</td>\n", | |
" <td>96.004632</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>1267</td>\n", | |
" <td>2717</td>\n", | |
" <td>2620</td>\n", | |
" <td>0</td>\n", | |
" <td>2698</td>\n", | |
" <td>2697</td>\n", | |
" <td>2689</td>\n", | |
" <td>2668</td>\n", | |
" <td>2667</td>\n", | |
" <td>334.0</td>\n", | |
" <td>...</td>\n", | |
" <td>683</td>\n", | |
" <td>683</td>\n", | |
" <td>389</td>\n", | |
" <td>387</td>\n", | |
" <td>387</td>\n", | |
" <td>96.4299</td>\n", | |
" <td>99.3007</td>\n", | |
" <td>99.2639</td>\n", | |
" <td>99.219041</td>\n", | |
" <td>99.181852</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1849</td>\n", | |
" <td>3736</td>\n", | |
" <td>3726</td>\n", | |
" <td>0</td>\n", | |
" <td>3689</td>\n", | |
" <td>3665</td>\n", | |
" <td>4318</td>\n", | |
" <td>4281</td>\n", | |
" <td>4292</td>\n", | |
" <td>525.0</td>\n", | |
" <td>...</td>\n", | |
" <td>1047</td>\n", | |
" <td>1047</td>\n", | |
" <td>812</td>\n", | |
" <td>811</td>\n", | |
" <td>811</td>\n", | |
" <td>99.7323</td>\n", | |
" <td>98.742</td>\n", | |
" <td>98.0996</td>\n", | |
" <td>99.143122</td>\n", | |
" <td>99.397869</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>1691</td>\n", | |
" <td>3602</td>\n", | |
" <td>3603</td>\n", | |
" <td>0</td>\n", | |
" <td>3530</td>\n", | |
" <td>3530</td>\n", | |
" <td>4686</td>\n", | |
" <td>4625</td>\n", | |
" <td>4570</td>\n", | |
" <td>366.0</td>\n", | |
" <td>...</td>\n", | |
" <td>592</td>\n", | |
" <td>592</td>\n", | |
" <td>521</td>\n", | |
" <td>485</td>\n", | |
" <td>452</td>\n", | |
" <td>100.028</td>\n", | |
" <td>98.0011</td>\n", | |
" <td>98.0011</td>\n", | |
" <td>98.698250</td>\n", | |
" <td>97.524541</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>1938</td>\n", | |
" <td>3947</td>\n", | |
" <td>3834</td>\n", | |
" <td>0</td>\n", | |
" <td>3904</td>\n", | |
" <td>3889</td>\n", | |
" <td>4681</td>\n", | |
" <td>4655</td>\n", | |
" <td>4642</td>\n", | |
" <td>506.0</td>\n", | |
" <td>...</td>\n", | |
" <td>927</td>\n", | |
" <td>927</td>\n", | |
" <td>703</td>\n", | |
" <td>697</td>\n", | |
" <td>697</td>\n", | |
" <td>97.1371</td>\n", | |
" <td>98.9106</td>\n", | |
" <td>98.5305</td>\n", | |
" <td>99.444563</td>\n", | |
" <td>99.166845</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>2360</td>\n", | |
" <td>4582</td>\n", | |
" <td>4578</td>\n", | |
" <td>0</td>\n", | |
" <td>4551</td>\n", | |
" <td>4545</td>\n", | |
" <td>5439</td>\n", | |
" <td>5390</td>\n", | |
" <td>5364</td>\n", | |
" <td>596.0</td>\n", | |
" <td>...</td>\n", | |
" <td>1205</td>\n", | |
" <td>1205</td>\n", | |
" <td>894</td>\n", | |
" <td>888</td>\n", | |
" <td>888</td>\n", | |
" <td>99.9127</td>\n", | |
" <td>99.3234</td>\n", | |
" <td>99.1925</td>\n", | |
" <td>99.099099</td>\n", | |
" <td>98.621070</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>2530</td>\n", | |
" <td>5339</td>\n", | |
" <td>5198</td>\n", | |
" <td>0</td>\n", | |
" <td>5295</td>\n", | |
" <td>5295</td>\n", | |
" <td>6698</td>\n", | |
" <td>6646</td>\n", | |
" <td>6635</td>\n", | |
" <td>612.0</td>\n", | |
" <td>...</td>\n", | |
" <td>1228</td>\n", | |
" <td>1229</td>\n", | |
" <td>787</td>\n", | |
" <td>783</td>\n", | |
" <td>784</td>\n", | |
" <td>97.3591</td>\n", | |
" <td>99.1759</td>\n", | |
" <td>99.1759</td>\n", | |
" <td>99.223649</td>\n", | |
" <td>99.059421</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>1517</td>\n", | |
" <td>2862</td>\n", | |
" <td>2863</td>\n", | |
" <td>0</td>\n", | |
" <td>2766</td>\n", | |
" <td>2772</td>\n", | |
" <td>3659</td>\n", | |
" <td>3630</td>\n", | |
" <td>3624</td>\n", | |
" <td>375.0</td>\n", | |
" <td>...</td>\n", | |
" <td>682</td>\n", | |
" <td>687</td>\n", | |
" <td>521</td>\n", | |
" <td>514</td>\n", | |
" <td>515</td>\n", | |
" <td>100.035</td>\n", | |
" <td>96.6457</td>\n", | |
" <td>96.8553</td>\n", | |
" <td>99.207434</td>\n", | |
" <td>99.043454</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>49 rows × 21 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" total_docs table_wrap_count table_wrap_table_count \\\n", | |
"year month \n", | |
"2017 0 10937 21465 18827 \n", | |
" 1 539 1135 978 \n", | |
" 2 586 1440 1227 \n", | |
" 3 1083 2569 2026 \n", | |
" 4 1043 2219 1773 \n", | |
" 5 960 2156 1759 \n", | |
" 6 1144 2741 2253 \n", | |
" 7 930 2067 1641 \n", | |
" 8 1187 2573 2186 \n", | |
" 9 827 1929 1644 \n", | |
" 10 892 2119 1999 \n", | |
" 11 810 1932 1834 \n", | |
" 12 1397 2618 2557 \n", | |
"2018 0 10759 21612 20660 \n", | |
" 1 709 1320 1321 \n", | |
" 2 863 1985 1959 \n", | |
" 3 1089 2468 2449 \n", | |
" 4 988 1933 1918 \n", | |
" 5 975 2133 2117 \n", | |
" 6 1155 2368 2338 \n", | |
" 7 1230 2911 2901 \n", | |
" 8 1114 2144 2107 \n", | |
" 9 788 1836 1815 \n", | |
" 10 1302 3186 3172 \n", | |
" 11 1256 2710 2662 \n", | |
" 12 1044 2125 2125 \n", | |
"2019 0 1364 2761 2701 \n", | |
" 1 800 1604 1600 \n", | |
" 2 1086 2332 2308 \n", | |
" 3 830 1691 1694 \n", | |
" 4 1853 3676 3682 \n", | |
" 5 2023 4339 4211 \n", | |
" 6 1593 3705 3621 \n", | |
" 7 1969 4328 4327 \n", | |
" 8 1888 4128 4038 \n", | |
" 9 2314 4947 4794 \n", | |
" 10 2197 4833 4824 \n", | |
" 11 2203 4899 4794 \n", | |
" 12 2498 5644 5475 \n", | |
" 109 1 0 0 \n", | |
"2020 0 2 4 4 \n", | |
" 1 1336 2606 2629 \n", | |
" 2 1267 2717 2620 \n", | |
" 3 1849 3736 3726 \n", | |
" 4 1691 3602 3603 \n", | |
" 5 1938 3947 3834 \n", | |
" 6 2360 4582 4578 \n", | |
" 7 2530 5339 5198 \n", | |
" 8 1517 2862 2863 \n", | |
"\n", | |
" table_wrap_graphic_count table_wrap_label_count \\\n", | |
"year month \n", | |
"2017 0 2009 20818 \n", | |
" 1 10 1103 \n", | |
" 2 164 1415 \n", | |
" 3 405 2549 \n", | |
" 4 361 2211 \n", | |
" 5 310 2150 \n", | |
" 6 468 2709 \n", | |
" 7 351 2064 \n", | |
" 8 266 2536 \n", | |
" 9 250 1928 \n", | |
" 10 82 2115 \n", | |
" 11 1 1928 \n", | |
" 12 11 2599 \n", | |
"2018 0 16 21020 \n", | |
" 1 0 1303 \n", | |
" 2 0 1983 \n", | |
" 3 0 2461 \n", | |
" 4 1 1925 \n", | |
" 5 0 2128 \n", | |
" 6 0 2358 \n", | |
" 7 0 2903 \n", | |
" 8 0 2126 \n", | |
" 9 0 1765 \n", | |
" 10 0 3178 \n", | |
" 11 4 2702 \n", | |
" 12 0 2099 \n", | |
"2019 0 0 2672 \n", | |
" 1 0 1560 \n", | |
" 2 0 2324 \n", | |
" 3 0 1685 \n", | |
" 4 0 3632 \n", | |
" 5 0 4270 \n", | |
" 6 0 3632 \n", | |
" 7 0 4238 \n", | |
" 8 0 4052 \n", | |
" 9 0 4913 \n", | |
" 10 0 4817 \n", | |
" 11 0 4800 \n", | |
" 12 0 5369 \n", | |
" 109 0 0 \n", | |
"2020 0 0 4 \n", | |
" 1 0 2539 \n", | |
" 2 0 2698 \n", | |
" 3 0 3689 \n", | |
" 4 0 3530 \n", | |
" 5 0 3904 \n", | |
" 6 0 4551 \n", | |
" 7 0 5295 \n", | |
" 8 0 2766 \n", | |
"\n", | |
" table_wrap_caption_title_count fig_count fig_label_count \\\n", | |
"year month \n", | |
"2017 0 20936 24384 23942 \n", | |
" 1 1103 1507 1495 \n", | |
" 2 1414 1564 1547 \n", | |
" 3 2548 3106 3098 \n", | |
" 4 2211 2502 2456 \n", | |
" 5 2145 2418 2400 \n", | |
" 6 2712 3560 3540 \n", | |
" 7 2064 2434 2420 \n", | |
" 8 2536 3127 3110 \n", | |
" 9 1928 2548 2523 \n", | |
" 10 2115 2779 2772 \n", | |
" 11 1928 2154 2152 \n", | |
" 12 2599 3353 3320 \n", | |
"2018 0 21018 23669 23128 \n", | |
" 1 1305 2124 2118 \n", | |
" 2 1983 2463 2456 \n", | |
" 3 2460 3025 3014 \n", | |
" 4 1928 2483 2451 \n", | |
" 5 2128 2857 2831 \n", | |
" 6 2358 3235 3231 \n", | |
" 7 2910 4593 4585 \n", | |
" 8 2125 2814 2802 \n", | |
" 9 1768 1918 1913 \n", | |
" 10 3178 4046 4042 \n", | |
" 11 2703 3309 3282 \n", | |
" 12 2101 2925 2906 \n", | |
"2019 0 2672 3199 3162 \n", | |
" 1 1561 1899 1887 \n", | |
" 2 2325 2430 2423 \n", | |
" 3 1685 2353 2301 \n", | |
" 4 3640 4342 4318 \n", | |
" 5 4274 4819 4769 \n", | |
" 6 3618 4458 4436 \n", | |
" 7 4243 4547 4511 \n", | |
" 8 4043 4706 4627 \n", | |
" 9 4915 5964 5903 \n", | |
" 10 4814 5061 5040 \n", | |
" 11 4800 5618 5540 \n", | |
" 12 5095 5992 5947 \n", | |
" 109 0 4 4 \n", | |
"2020 0 4 8 8 \n", | |
" 1 2541 3454 3350 \n", | |
" 2 2697 2689 2668 \n", | |
" 3 3665 4318 4281 \n", | |
" 4 3530 4686 4625 \n", | |
" 5 3889 4681 4655 \n", | |
" 6 4545 5439 5390 \n", | |
" 7 5295 6698 6646 \n", | |
" 8 2772 3659 3630 \n", | |
"\n", | |
" fig_caption_title_count has_subarticle_translation ... \\\n", | |
"year month ... \n", | |
"2017 0 23923 2605.0 ... \n", | |
" 1 1493 111.0 ... \n", | |
" 2 1550 107.0 ... \n", | |
" 3 3102 196.0 ... \n", | |
" 4 2464 178.0 ... \n", | |
" 5 2401 140.0 ... \n", | |
" 6 3515 240.0 ... \n", | |
" 7 2421 199.0 ... \n", | |
" 8 3108 178.0 ... \n", | |
" 9 2520 209.0 ... \n", | |
" 10 2759 162.0 ... \n", | |
" 11 2152 166.0 ... \n", | |
" 12 3318 208.0 ... \n", | |
"2018 0 23114 2420.0 ... \n", | |
" 1 2115 115.0 ... \n", | |
" 2 2459 155.0 ... \n", | |
" 3 3013 204.0 ... \n", | |
" 4 2455 172.0 ... \n", | |
" 5 2828 236.0 ... \n", | |
" 6 3217 289.0 ... \n", | |
" 7 4585 259.0 ... \n", | |
" 8 2791 266.0 ... \n", | |
" 9 1908 192.0 ... \n", | |
" 10 4020 222.0 ... \n", | |
" 11 3294 276.0 ... \n", | |
" 12 2903 196.0 ... \n", | |
"2019 0 3165 325.0 ... \n", | |
" 1 1861 249.0 ... \n", | |
" 2 2417 296.0 ... \n", | |
" 3 2306 223.0 ... \n", | |
" 4 4299 430.0 ... \n", | |
" 5 4773 393.0 ... \n", | |
" 6 4433 370.0 ... \n", | |
" 7 4541 448.0 ... \n", | |
" 8 4668 497.0 ... \n", | |
" 9 5905 455.0 ... \n", | |
" 10 5037 517.0 ... \n", | |
" 11 5544 448.0 ... \n", | |
" 12 5943 699.0 ... \n", | |
" 109 4 0.0 ... \n", | |
"2020 0 8 0.0 ... \n", | |
" 1 3316 312.0 ... \n", | |
" 2 2667 334.0 ... \n", | |
" 3 4292 525.0 ... \n", | |
" 4 4570 366.0 ... \n", | |
" 5 4642 506.0 ... \n", | |
" 6 5364 596.0 ... \n", | |
" 7 6635 612.0 ... \n", | |
" 8 3624 375.0 ... \n", | |
"\n", | |
" subarticle_translation_table_wrap_label_count \\\n", | |
"year month \n", | |
"2017 0 4884 \n", | |
" 1 209 \n", | |
" 2 277 \n", | |
" 3 489 \n", | |
" 4 441 \n", | |
" 5 360 \n", | |
" 6 562 \n", | |
" 7 389 \n", | |
" 8 343 \n", | |
" 9 486 \n", | |
" 10 365 \n", | |
" 11 417 \n", | |
" 12 440 \n", | |
"2018 0 4297 \n", | |
" 1 199 \n", | |
" 2 396 \n", | |
" 3 411 \n", | |
" 4 358 \n", | |
" 5 560 \n", | |
" 6 542 \n", | |
" 7 574 \n", | |
" 8 540 \n", | |
" 9 394 \n", | |
" 10 519 \n", | |
" 11 661 \n", | |
" 12 454 \n", | |
"2019 0 639 \n", | |
" 1 487 \n", | |
" 2 630 \n", | |
" 3 500 \n", | |
" 4 839 \n", | |
" 5 770 \n", | |
" 6 694 \n", | |
" 7 960 \n", | |
" 8 933 \n", | |
" 9 939 \n", | |
" 10 1158 \n", | |
" 11 913 \n", | |
" 12 1350 \n", | |
" 109 0 \n", | |
"2020 0 0 \n", | |
" 1 633 \n", | |
" 2 683 \n", | |
" 3 1047 \n", | |
" 4 592 \n", | |
" 5 927 \n", | |
" 6 1205 \n", | |
" 7 1228 \n", | |
" 8 682 \n", | |
"\n", | |
" subarticle_translation_table_wrap_caption_title_count \\\n", | |
"year month \n", | |
"2017 0 4945 \n", | |
" 1 209 \n", | |
" 2 277 \n", | |
" 3 489 \n", | |
" 4 441 \n", | |
" 5 360 \n", | |
" 6 565 \n", | |
" 7 388 \n", | |
" 8 342 \n", | |
" 9 486 \n", | |
" 10 365 \n", | |
" 11 417 \n", | |
" 12 439 \n", | |
"2018 0 4306 \n", | |
" 1 199 \n", | |
" 2 396 \n", | |
" 3 411 \n", | |
" 4 359 \n", | |
" 5 560 \n", | |
" 6 541 \n", | |
" 7 581 \n", | |
" 8 540 \n", | |
" 9 394 \n", | |
" 10 519 \n", | |
" 11 660 \n", | |
" 12 454 \n", | |
"2019 0 639 \n", | |
" 1 488 \n", | |
" 2 630 \n", | |
" 3 500 \n", | |
" 4 839 \n", | |
" 5 772 \n", | |
" 6 694 \n", | |
" 7 962 \n", | |
" 8 934 \n", | |
" 9 939 \n", | |
" 10 1158 \n", | |
" 11 911 \n", | |
" 12 1351 \n", | |
" 109 0 \n", | |
"2020 0 0 \n", | |
" 1 633 \n", | |
" 2 683 \n", | |
" 3 1047 \n", | |
" 4 592 \n", | |
" 5 927 \n", | |
" 6 1205 \n", | |
" 7 1229 \n", | |
" 8 687 \n", | |
"\n", | |
" subarticle_translation_fig_count \\\n", | |
"year month \n", | |
"2017 0 3705 \n", | |
" 1 131 \n", | |
" 2 225 \n", | |
" 3 280 \n", | |
" 4 367 \n", | |
" 5 184 \n", | |
" 6 340 \n", | |
" 7 206 \n", | |
" 8 265 \n", | |
" 9 351 \n", | |
" 10 257 \n", | |
" 11 275 \n", | |
" 12 327 \n", | |
"2018 0 3010 \n", | |
" 1 178 \n", | |
" 2 357 \n", | |
" 3 260 \n", | |
" 4 234 \n", | |
" 5 403 \n", | |
" 6 421 \n", | |
" 7 365 \n", | |
" 8 415 \n", | |
" 9 252 \n", | |
" 10 287 \n", | |
" 11 414 \n", | |
" 12 388 \n", | |
"2019 0 433 \n", | |
" 1 335 \n", | |
" 2 397 \n", | |
" 3 321 \n", | |
" 4 574 \n", | |
" 5 654 \n", | |
" 6 506 \n", | |
" 7 767 \n", | |
" 8 790 \n", | |
" 9 704 \n", | |
" 10 573 \n", | |
" 11 748 \n", | |
" 12 1195 \n", | |
" 109 0 \n", | |
"2020 0 0 \n", | |
" 1 422 \n", | |
" 2 389 \n", | |
" 3 812 \n", | |
" 4 521 \n", | |
" 5 703 \n", | |
" 6 894 \n", | |
" 7 787 \n", | |
" 8 521 \n", | |
"\n", | |
" subarticle_translation_fig_label_count \\\n", | |
"year month \n", | |
"2017 0 3669 \n", | |
" 1 129 \n", | |
" 2 220 \n", | |
" 3 280 \n", | |
" 4 367 \n", | |
" 5 184 \n", | |
" 6 338 \n", | |
" 7 204 \n", | |
" 8 265 \n", | |
" 9 346 \n", | |
" 10 256 \n", | |
" 11 273 \n", | |
" 12 314 \n", | |
"2018 0 2995 \n", | |
" 1 178 \n", | |
" 2 357 \n", | |
" 3 258 \n", | |
" 4 234 \n", | |
" 5 396 \n", | |
" 6 419 \n", | |
" 7 365 \n", | |
" 8 410 \n", | |
" 9 252 \n", | |
" 10 283 \n", | |
" 11 407 \n", | |
" 12 387 \n", | |
"2019 0 432 \n", | |
" 1 333 \n", | |
" 2 395 \n", | |
" 3 320 \n", | |
" 4 566 \n", | |
" 5 650 \n", | |
" 6 505 \n", | |
" 7 753 \n", | |
" 8 785 \n", | |
" 9 701 \n", | |
" 10 569 \n", | |
" 11 739 \n", | |
" 12 1191 \n", | |
" 109 0 \n", | |
"2020 0 0 \n", | |
" 1 421 \n", | |
" 2 387 \n", | |
" 3 811 \n", | |
" 4 485 \n", | |
" 5 697 \n", | |
" 6 888 \n", | |
" 7 783 \n", | |
" 8 514 \n", | |
"\n", | |
" subarticle_translation_fig_caption_title_count \\\n", | |
"year month \n", | |
"2017 0 3672 \n", | |
" 1 129 \n", | |
" 2 220 \n", | |
" 3 280 \n", | |
" 4 367 \n", | |
" 5 182 \n", | |
" 6 337 \n", | |
" 7 205 \n", | |
" 8 265 \n", | |
" 9 346 \n", | |
" 10 250 \n", | |
" 11 273 \n", | |
" 12 315 \n", | |
"2018 0 3000 \n", | |
" 1 178 \n", | |
" 2 357 \n", | |
" 3 258 \n", | |
" 4 234 \n", | |
" 5 376 \n", | |
" 6 419 \n", | |
" 7 365 \n", | |
" 8 414 \n", | |
" 9 252 \n", | |
" 10 283 \n", | |
" 11 408 \n", | |
" 12 388 \n", | |
"2019 0 432 \n", | |
" 1 333 \n", | |
" 2 395 \n", | |
" 3 320 \n", | |
" 4 561 \n", | |
" 5 649 \n", | |
" 6 506 \n", | |
" 7 754 \n", | |
" 8 785 \n", | |
" 9 702 \n", | |
" 10 569 \n", | |
" 11 743 \n", | |
" 12 1190 \n", | |
" 109 0 \n", | |
"2020 0 0 \n", | |
" 1 421 \n", | |
" 2 387 \n", | |
" 3 811 \n", | |
" 4 452 \n", | |
" 5 697 \n", | |
" 6 888 \n", | |
" 7 784 \n", | |
" 8 515 \n", | |
"\n", | |
" percentage_html_tables percentage_tables_with_labels \\\n", | |
"year month \n", | |
"2017 0 87.7102 96.9858 \n", | |
" 1 86.1674 97.1806 \n", | |
" 2 85.2083 98.2639 \n", | |
" 3 78.8634 99.2215 \n", | |
" 4 79.9009 99.6395 \n", | |
" 5 81.5863 99.7217 \n", | |
" 6 82.1963 98.8325 \n", | |
" 7 79.3904 99.8549 \n", | |
" 8 84.9592 98.562 \n", | |
" 9 85.2255 99.9482 \n", | |
" 10 94.337 99.8112 \n", | |
" 11 94.9275 99.793 \n", | |
" 12 97.67 99.2743 \n", | |
"2018 0 95.595 97.2608 \n", | |
" 1 100.076 98.7121 \n", | |
" 2 98.6902 99.8992 \n", | |
" 3 99.2301 99.7164 \n", | |
" 4 99.224 99.5861 \n", | |
" 5 99.2499 99.7656 \n", | |
" 6 98.7331 99.5777 \n", | |
" 7 99.6565 99.7252 \n", | |
" 8 98.2743 99.1604 \n", | |
" 9 98.8562 96.1329 \n", | |
" 10 99.5606 99.7489 \n", | |
" 11 98.2288 99.7048 \n", | |
" 12 100 98.7765 \n", | |
"2019 0 97.8269 96.7765 \n", | |
" 1 99.7506 97.2569 \n", | |
" 2 98.9708 99.6569 \n", | |
" 3 100.177 99.6452 \n", | |
" 4 100.163 98.803 \n", | |
" 5 97.05 98.4098 \n", | |
" 6 97.7328 98.0297 \n", | |
" 7 99.9769 97.9205 \n", | |
" 8 97.8198 98.1589 \n", | |
" 9 96.9072 99.3127 \n", | |
" 10 99.8138 99.6689 \n", | |
" 11 97.8567 97.9792 \n", | |
" 12 97.0057 95.1276 \n", | |
" 109 \n", | |
"2020 0 100 100 \n", | |
" 1 100.883 97.429 \n", | |
" 2 96.4299 99.3007 \n", | |
" 3 99.7323 98.742 \n", | |
" 4 100.028 98.0011 \n", | |
" 5 97.1371 98.9106 \n", | |
" 6 99.9127 99.3234 \n", | |
" 7 97.3591 99.1759 \n", | |
" 8 100.035 96.6457 \n", | |
"\n", | |
" percentage_tables_with_captions_titles percentage_fig_with_labels \\\n", | |
"year month \n", | |
"2017 0 97.5355 98.187336 \n", | |
" 1 97.1806 99.203716 \n", | |
" 2 98.1944 98.913043 \n", | |
" 3 99.1826 99.742434 \n", | |
" 4 99.6395 98.161471 \n", | |
" 5 99.4898 99.255583 \n", | |
" 6 98.942 99.438202 \n", | |
" 7 99.8549 99.424815 \n", | |
" 8 98.562 99.456348 \n", | |
" 9 99.9482 99.018838 \n", | |
" 10 99.8112 99.748111 \n", | |
" 11 99.793 99.907149 \n", | |
" 12 99.2743 99.015807 \n", | |
"2018 0 97.2515 97.714310 \n", | |
" 1 98.8636 99.717514 \n", | |
" 2 99.8992 99.715794 \n", | |
" 3 99.6759 99.636364 \n", | |
" 4 99.7413 98.711236 \n", | |
" 5 99.7656 99.089954 \n", | |
" 6 99.5777 99.876352 \n", | |
" 7 99.9656 99.825822 \n", | |
" 8 99.1138 99.573561 \n", | |
" 9 96.2963 99.739312 \n", | |
" 10 99.7489 99.901137 \n", | |
" 11 99.7417 99.184044 \n", | |
" 12 98.8706 99.350427 \n", | |
"2019 0 96.7765 98.843389 \n", | |
" 1 97.3192 99.368088 \n", | |
" 2 99.6998 99.711934 \n", | |
" 3 99.6452 97.790055 \n", | |
" 4 99.0207 99.447259 \n", | |
" 5 98.502 98.962440 \n", | |
" 6 97.6518 99.506505 \n", | |
" 7 98.036 99.208269 \n", | |
" 8 97.9409 98.321292 \n", | |
" 9 99.3531 98.977197 \n", | |
" 10 99.6069 99.585062 \n", | |
" 11 97.9792 98.611606 \n", | |
" 12 90.2729 99.248999 \n", | |
" 109 100.000000 \n", | |
"2020 0 100 100.000000 \n", | |
" 1 97.5058 96.988998 \n", | |
" 2 99.2639 99.219041 \n", | |
" 3 98.0996 99.143122 \n", | |
" 4 98.0011 98.698250 \n", | |
" 5 98.5305 99.444563 \n", | |
" 6 99.1925 99.099099 \n", | |
" 7 99.1759 99.223649 \n", | |
" 8 96.8553 99.207434 \n", | |
"\n", | |
" percentage_fig_with_captions_titles \n", | |
"year month \n", | |
"2017 0 98.109416 \n", | |
" 1 99.071002 \n", | |
" 2 99.104859 \n", | |
" 3 99.871217 \n", | |
" 4 98.481215 \n", | |
" 5 99.296940 \n", | |
" 6 98.735955 \n", | |
" 7 99.465900 \n", | |
" 8 99.392389 \n", | |
" 9 98.901099 \n", | |
" 10 99.280317 \n", | |
" 11 99.907149 \n", | |
" 12 98.956159 \n", | |
"2018 0 97.655161 \n", | |
" 1 99.576271 \n", | |
" 2 99.837596 \n", | |
" 3 99.603306 \n", | |
" 4 98.872332 \n", | |
" 5 98.984949 \n", | |
" 6 99.443586 \n", | |
" 7 99.825822 \n", | |
" 8 99.182658 \n", | |
" 9 99.478624 \n", | |
" 10 99.357390 \n", | |
" 11 99.546691 \n", | |
" 12 99.247863 \n", | |
"2019 0 98.937168 \n", | |
" 1 97.998947 \n", | |
" 2 99.465021 \n", | |
" 3 98.002550 \n", | |
" 4 99.009673 \n", | |
" 5 99.045445 \n", | |
" 6 99.439210 \n", | |
" 7 99.868045 \n", | |
" 8 99.192520 \n", | |
" 9 99.010731 \n", | |
" 10 99.525785 \n", | |
" 11 98.682805 \n", | |
" 12 99.182243 \n", | |
" 109 100.000000 \n", | |
"2020 0 100.000000 \n", | |
" 1 96.004632 \n", | |
" 2 99.181852 \n", | |
" 3 99.397869 \n", | |
" 4 97.524541 \n", | |
" 5 99.166845 \n", | |
" 6 98.621070 \n", | |
" 7 99.059421 \n", | |
" 8 99.043454 \n", | |
"\n", | |
"[49 rows x 21 columns]" | |
] | |
}, | |
"execution_count": 24, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df = (\n", | |
" df.groupby([\"year\", \"month\"]).sum()\n", | |
" .assign(\n", | |
" percentage_html_tables=lambda df: (df.table_wrap_table_count / df.table_wrap_count) * 100,\n", | |
" percentage_tables_with_labels=lambda df: (df.table_wrap_label_count / df.table_wrap_count) * 100,\n", | |
" percentage_tables_with_captions_titles=lambda df: (df.table_wrap_caption_title_count / df.table_wrap_count) * 100,\n", | |
" percentage_fig_with_labels=lambda df: (df.fig_label_count / df.fig_count) * 100,\n", | |
" percentage_fig_with_captions_titles=lambda df: (df.fig_caption_title_count / df.fig_count) * 100,\n", | |
" ) \n", | |
").fillna(\"\")\n", | |
"# Removeremos da análise os meses 9, 10 e 11 de 2020\n", | |
"df = df.drop(df.index[-3:])\n", | |
"df" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 25, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df.to_csv(\"total_ativos_agrupados_2017-2020.csv\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 26, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>0</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>total_docs</th>\n", | |
" <td>82716.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>total_tables</th>\n", | |
" <td>173976.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>total_figs</th>\n", | |
" <td>205921.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>percentage_html_tables</th>\n", | |
" <td>95.054490</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>percentage_tables_with_labels</th>\n", | |
" <td>98.298616</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>percentage_tables_with_captions_titles</th>\n", | |
" <td>98.189980</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>percentage_fig_with_labels</th>\n", | |
" <td>98.899578</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>percentage_fig_with_captions_titles</th>\n", | |
" <td>98.796140</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" 0\n", | |
"total_docs 82716.000000\n", | |
"total_tables 173976.000000\n", | |
"total_figs 205921.000000\n", | |
"percentage_html_tables 95.054490\n", | |
"percentage_tables_with_labels 98.298616\n", | |
"percentage_tables_with_captions_titles 98.189980\n", | |
"percentage_fig_with_labels 98.899578\n", | |
"percentage_fig_with_captions_titles 98.796140" | |
] | |
}, | |
"execution_count": 26, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"overall_percentages = {\n", | |
" \"total_docs\": df[\"total_docs\"].sum(),\n", | |
" \"total_tables\": df[\"table_wrap_count\"].sum(),\n", | |
" \"total_figs\": df[\"fig_count\"].sum(),\n", | |
" \"percentage_html_tables\": [(df[\"table_wrap_table_count\"].sum() / df[\"table_wrap_count\"].sum()) * 100],\n", | |
" \"percentage_tables_with_labels\": [(df[\"table_wrap_label_count\"].sum() / df[\"table_wrap_count\"].sum()) * 100],\n", | |
" \"percentage_tables_with_captions_titles\": [(df[\"table_wrap_caption_title_count\"].sum() / df[\"table_wrap_count\"].sum()) * 100],\n", | |
" \"percentage_fig_with_labels\": [(df[\"fig_label_count\"].sum() / df[\"fig_count\"].sum()) * 100],\n", | |
" \"percentage_fig_with_captions_titles\": [(df[\"fig_caption_title_count\"].sum() / df[\"fig_count\"].sum()) * 100],\n", | |
"}\n", | |
"df_overall_percentages = pd.DataFrame.from_dict(overall_percentages)\n", | |
"df_overall_percentages.T" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Na tabela acima mês de valor `0` corresponde aos XMLs que não possuem a indicação do mês de publicação. Isso pode ocorrer devido a não obrigatoriedade do metadado.\n", | |
"\n", | |
"Em uma análise preliminar sobre aspectos de acessibilidade em 82716 documentos publicados entre janeiro de 2017 e agosto de 2020, dentre 173.976 tabelas, 95,05% estão codificadas em HTML, 98,29% apresentam rótulo descritivo e 98,18% legenda. Ainda no mesmo conjunto de documentos, dentre 205.921 figuras, 98,89% apresentam rótulo descritivo e 98,79% legenda." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.8.0" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment