Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save mikegerber/72e57c847486163f46de94a71987ef5c to your computer and use it in GitHub Desktop.
Save mikegerber/72e57c847486163f46de94a71987ef5c to your computer and use it in GitHub Desktop.
digisam - How many pages in the year 1666?.ipynb
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"metadata": {
"trusted": false
},
"id": "d8aa77b1",
"cell_type": "code",
"source": "import pandas as pd\n\npd.set_option(\"display.max_rows\", None)",
"execution_count": 1,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"id": "95a0d080",
"cell_type": "code",
"source": "mods_info_df = pd.read_csv(\"/home/mike/devel/qurator-data/digisam/mods_info/mods_info_df_all.2023-01-23.csv\",\n index_col=0)",
"execution_count": 2,
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": "/tmp/ipykernel_35645/4200959452.py:1: DtypeWarning: Columns (29,32,73,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,210,211,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313,314,315,316,317,318,319,320,321,322,323,324,325,326,327,328,329,330,331,332,333,334,335,336,337,338,340,341,342,343,344,345,346,347,348,349,350,351,352,353,354,355,356,357,358,359,360,361,362,363,364,365,366,367,368,369,370,371,372,373,374,375,376,377,378,379,380,381,382,383,384,385,386,387,388,389,390,391,392,393,394,395,396,397,398,399,400,401,402,403,404,405,406,407,408,409,410,411,412,413,414,415,416,417,418,419,420,421,422,423,424,425,426,427,428,429,430,431,432,433,434,435,436) have mixed types. Specify dtype option on import or set low_memory=False.\n mods_info_df = pd.read_csv(\"/home/mike/devel/qurator-data/digisam/mods_info/mods_info_df_all.2023-01-23.csv\",\n"
}
]
},
{
"metadata": {
"trusted": false
},
"id": "09f08dc8",
"cell_type": "code",
"source": "# Select works from 1666 (created or published)\nmods_info_df = mods_info_df[\n mods_info_df[\"originInfo-production0_dateCreated\"].astype(\"string\").str.startswith(\"1666\") |\n mods_info_df[\"originInfo-publication0_dateIssued\"].astype(\"string\").str.startswith(\"1666\")\n]",
"execution_count": 3,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"id": "1bd72d63",
"cell_type": "code",
"source": "# Page count could be done by looking at the count of files in the PRESENTATION METS file group.\n# This is only slightly incorrect as it also contains e.g. the color checker, but probably suffices in this case.\n#\n# (Just printing a few of the >300 values here)\nmods_info_df[\"mets_fileSec_fileGrp-PRESENTATION-count\"].head()",
"execution_count": 4,
"outputs": [
{
"data": {
"text/plain": "PPN607617047 33.0\nPPN848333268 20.0\nPPN873829409 17.0\nPPN664526918 12.0\nPPN71528990X 21.0\nName: mets_fileSec_fileGrp-PRESENTATION-count, dtype: float64"
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": false
},
"id": "6e4cc65e",
"cell_type": "code",
"source": "# Summing it up:\n\nmods_info_df[\"mets_fileSec_fileGrp-PRESENTATION-count\"].sum()",
"execution_count": 5,
"outputs": [
{
"data": {
"text/plain": "55309.0"
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
]
}
],
"metadata": {
"gist": {
"id": "",
"data": {
"description": "digisam - How many pages in the year 1666?.ipynb",
"public": false
}
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3 (ipykernel)",
"language": "python"
},
"language_info": {
"name": "python",
"version": "3.9.13",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"file_extension": ".py"
},
"toc": {
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"base_numbering": 1,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": true
},
"hide_input": false
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment