Created
April 13, 2024 12:46
-
-
Save andjc/95e1da80160efa972c16cd7228408182 to your computer and use it in GitHub Desktop.
localised_dataframe_persian.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/andjc/95e1da80160efa972c16cd7228408182/localised_dataframe_persian.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "8AjWb9wFfh2S" | |
}, | |
"source": [ | |
"# Localised display of Pandas dataframe: Persian example" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "PJPSEncjfh2T" | |
}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"import numpy as np\n", | |
"import datetime as datetime\n", | |
"import icu\n", | |
"from functools import partial" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "UPaD-uHHfh2U" | |
}, | |
"source": [ | |
"Create a sample dataframe with 3 columns, with\n", | |
"\n", | |
"1. timezone aware datetime objects\n", | |
"2. integers\n", | |
"3. floating point numbers" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "QJphnNX_fh2V", | |
"outputId": "8049d358-4df6-40a4-997b-538caf7f4064" | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"<class 'pandas.core.frame.DataFrame'>\n", | |
"RangeIndex: 10 entries, 0 to 9\n", | |
"Data columns (total 3 columns):\n", | |
" # Column Non-Null Count Dtype \n", | |
"--- ------ -------------- ----- \n", | |
" 0 date 10 non-null datetime64[ns, Asia/Tehran]\n", | |
" 1 A 10 non-null int64 \n", | |
" 2 B 10 non-null float64 \n", | |
"dtypes: datetime64[ns, Asia/Tehran](1), float64(1), int64(1)\n", | |
"memory usage: 372.0 bytes\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>date</th>\n", | |
" <th>A</th>\n", | |
" <th>B</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>2020-02-15 00:00:00+03:30</td>\n", | |
" <td>81838</td>\n", | |
" <td>10.589499</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>2020-03-03 00:00:00+03:30</td>\n", | |
" <td>253760</td>\n", | |
" <td>18.382767</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>2021-03-17 00:00:00+03:30</td>\n", | |
" <td>201534</td>\n", | |
" <td>11.933844</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>2021-04-16 00:00:00+04:30</td>\n", | |
" <td>213254</td>\n", | |
" <td>12.523553</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>2021-05-10 00:00:00+04:30</td>\n", | |
" <td>232608</td>\n", | |
" <td>14.694209</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>2021-09-11 00:00:00+04:30</td>\n", | |
" <td>185136</td>\n", | |
" <td>11.841187</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>2021-10-30 00:00:00+03:30</td>\n", | |
" <td>200630</td>\n", | |
" <td>16.752790</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>2022-06-04 00:00:00+04:30</td>\n", | |
" <td>260568</td>\n", | |
" <td>19.738399</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>2022-08-24 00:00:00+04:30</td>\n", | |
" <td>144653</td>\n", | |
" <td>13.072700</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>2022-10-14 00:00:00+03:30</td>\n", | |
" <td>191453</td>\n", | |
" <td>16.931486</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" date A B\n", | |
"0 2020-02-15 00:00:00+03:30 81838 10.589499\n", | |
"1 2020-03-03 00:00:00+03:30 253760 18.382767\n", | |
"2 2021-03-17 00:00:00+03:30 201534 11.933844\n", | |
"3 2021-04-16 00:00:00+04:30 213254 12.523553\n", | |
"4 2021-05-10 00:00:00+04:30 232608 14.694209\n", | |
"5 2021-09-11 00:00:00+04:30 185136 11.841187\n", | |
"6 2021-10-30 00:00:00+03:30 200630 16.752790\n", | |
"7 2022-06-04 00:00:00+04:30 260568 19.738399\n", | |
"8 2022-08-24 00:00:00+04:30 144653 13.072700\n", | |
"9 2022-10-14 00:00:00+03:30 191453 16.931486" | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df = pd.DataFrame({\n", | |
" 'date': np.random.choice(pd.date_range('2020-01-01', '2023-01-01'), 10),\n", | |
" 'A': np.random.randint(50000,300000,size=10),\n", | |
" 'B': 10 + np.random.rand(10,)*10\n", | |
"})\n", | |
"\n", | |
"df.sort_values(by='date', ignore_index=True, inplace = True)\n", | |
"\n", | |
"df['date'] = df['date'].dt.tz_localize('Asia/Tehran')\n", | |
"\n", | |
"df.info()\n", | |
"df" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "V-nz9lMyfh2V" | |
}, | |
"source": [ | |
"Set up helper functions to convert _ints_ and _floats_ to the required number system and convert between calendar systems." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "u9jGDLAtfh2W" | |
}, | |
"outputs": [], | |
"source": [ | |
"def convert_number_system(num, langtag, prec=2):\n", | |
" loc = icu.Locale.forLanguageTag(langtag)\n", | |
" frmter = icu.LocalizedNumberFormatter(loc).precision(icu.Precision.minMaxFraction(prec,prec))\n", | |
" if isinstance(num, float):\n", | |
" return frmter.formatDouble(num)\n", | |
" return frmter.formatInt(num)\n", | |
"\n", | |
"def convert_calendar(date, source_langtag, target_langtag, source_timezone, target_timezone=None):\n", | |
" # Source calendar\n", | |
" stz = icu.TimeZone.createTimeZone(source_timezone)\n", | |
" source_locale = icu.Locale.forLanguageTag(source_langtag)\n", | |
" source_cal = icu.Calendar.createInstance(stz, source_locale)\n", | |
" # source_dateformat = icu.DateFormat.createDateInstance(icu.DateFormat.MEDIUM, source_locale)\n", | |
" # Target calendar\n", | |
" ttz = icu.TimeZone.createTimeZone(target_timezone) if target_timezone else stz\n", | |
" target_locale = icu.Locale.forLanguageTag(target_langtag)\n", | |
" target_cal = icu.Calendar.createInstance(ttz, target_locale)\n", | |
" target_dateformat = icu.DateFormat.createDateInstance(icu.DateFormat.MEDIUM, target_locale)\n", | |
" # Convert date to ICU Date object\n", | |
" internal_date = source_cal.set(date.year, date.month-1, date.day, date.hour, date.minute, date.second)\n", | |
" return target_dateformat.format(target_cal.setTime(internal_date.getTime()))\n", | |
"\n", | |
"fa_langtag = \"fa-IR-u-ca-persian-nu-arabext\"\n", | |
"\n", | |
"persian_numbers = partial(convert_number_system, langtag=fa_langtag)\n", | |
"\n", | |
"to_persian_calendar = partial(convert_calendar,\n", | |
" source_langtag = 'fa-IR-u-ca-gregory',\n", | |
" target_langtag = fa_langtag,\n", | |
" source_timezone = 'Asia/Tehran',\n", | |
" target_timezone = None\n", | |
")" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "0XKcy_PSfh2W" | |
}, | |
"source": [ | |
"Display localised dataframe:\n", | |
"\n", | |
"1. Relabeled columns using Persian labels instead of English column labels\n", | |
"2. Gregorian dates displaed using Persian (Iranian) Calendar\n", | |
"3. Localised numbers\n", | |
"4. table cells displayed RTL\n", | |
"5. Adjust font" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "A5HW8ry8fh2X", | |
"outputId": "4d0739e4-930a-4ead-ce50-5c9fa8d06351" | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<style type=\"text/css\">\n", | |
"#T_60494 td {\n", | |
" direction: rtl;\n", | |
"}\n", | |
"#T_60494 th {\n", | |
" direction: rtl;\n", | |
"}\n", | |
"#T_60494_row0_col0, #T_60494_row0_col1, #T_60494_row0_col2, #T_60494_row1_col0, #T_60494_row1_col1, #T_60494_row1_col2, #T_60494_row2_col0, #T_60494_row2_col1, #T_60494_row2_col2, #T_60494_row3_col0, #T_60494_row3_col1, #T_60494_row3_col2, #T_60494_row4_col0, #T_60494_row4_col1, #T_60494_row4_col2, #T_60494_row5_col0, #T_60494_row5_col1, #T_60494_row5_col2, #T_60494_row6_col0, #T_60494_row6_col1, #T_60494_row6_col2, #T_60494_row7_col0, #T_60494_row7_col1, #T_60494_row7_col2, #T_60494_row8_col0, #T_60494_row8_col1, #T_60494_row8_col2, #T_60494_row9_col0, #T_60494_row9_col1, #T_60494_row9_col2 {\n", | |
" font-family: Noto Naskh Arabic;\n", | |
" font-size: 16px;\n", | |
"}\n", | |
"</style>\n", | |
"<table id=\"T_60494\">\n", | |
" <thead>\n", | |
" <tr>\n", | |
" <th class=\"blank level0\" > </th>\n", | |
" <th id=\"T_60494_level0_col0\" class=\"col_heading level0 col0\" >تاریخ</th>\n", | |
" <th id=\"T_60494_level0_col1\" class=\"col_heading level0 col1\" >ا</th>\n", | |
" <th id=\"T_60494_level0_col2\" class=\"col_heading level0 col2\" >ب</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th id=\"T_60494_level0_row0\" class=\"row_heading level0 row0\" >0</th>\n", | |
" <td id=\"T_60494_row0_col0\" class=\"data row0 col0\" >۲۶ بهمن ۱۳۹۸</td>\n", | |
" <td id=\"T_60494_row0_col1\" class=\"data row0 col1\" >۸۱٬۸۳۸٫۰۰</td>\n", | |
" <td id=\"T_60494_row0_col2\" class=\"data row0 col2\" >۱۰٫۵۹</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th id=\"T_60494_level0_row1\" class=\"row_heading level0 row1\" >1</th>\n", | |
" <td id=\"T_60494_row1_col0\" class=\"data row1 col0\" >۱۳ اسفند ۱۳۹۸</td>\n", | |
" <td id=\"T_60494_row1_col1\" class=\"data row1 col1\" >۲۵۳٬۷۶۰٫۰۰</td>\n", | |
" <td id=\"T_60494_row1_col2\" class=\"data row1 col2\" >۱۸٫۳۸</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th id=\"T_60494_level0_row2\" class=\"row_heading level0 row2\" >2</th>\n", | |
" <td id=\"T_60494_row2_col0\" class=\"data row2 col0\" >۲۷ اسفند ۱۳۹۹</td>\n", | |
" <td id=\"T_60494_row2_col1\" class=\"data row2 col1\" >۲۰۱٬۵۳۴٫۰۰</td>\n", | |
" <td id=\"T_60494_row2_col2\" class=\"data row2 col2\" >۱۱٫۹۳</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th id=\"T_60494_level0_row3\" class=\"row_heading level0 row3\" >3</th>\n", | |
" <td id=\"T_60494_row3_col0\" class=\"data row3 col0\" >۲۷ فروردین ۱۴۰۰</td>\n", | |
" <td id=\"T_60494_row3_col1\" class=\"data row3 col1\" >۲۱۳٬۲۵۴٫۰۰</td>\n", | |
" <td id=\"T_60494_row3_col2\" class=\"data row3 col2\" >۱۲٫۵۲</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th id=\"T_60494_level0_row4\" class=\"row_heading level0 row4\" >4</th>\n", | |
" <td id=\"T_60494_row4_col0\" class=\"data row4 col0\" >۲۰ اردیبهشت ۱۴۰۰</td>\n", | |
" <td id=\"T_60494_row4_col1\" class=\"data row4 col1\" >۲۳۲٬۶۰۸٫۰۰</td>\n", | |
" <td id=\"T_60494_row4_col2\" class=\"data row4 col2\" >۱۴٫۶۹</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th id=\"T_60494_level0_row5\" class=\"row_heading level0 row5\" >5</th>\n", | |
" <td id=\"T_60494_row5_col0\" class=\"data row5 col0\" >۲۰ شهریور ۱۴۰۰</td>\n", | |
" <td id=\"T_60494_row5_col1\" class=\"data row5 col1\" >۱۸۵٬۱۳۶٫۰۰</td>\n", | |
" <td id=\"T_60494_row5_col2\" class=\"data row5 col2\" >۱۱٫۸۴</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th id=\"T_60494_level0_row6\" class=\"row_heading level0 row6\" >6</th>\n", | |
" <td id=\"T_60494_row6_col0\" class=\"data row6 col0\" >۸ آبان ۱۴۰۰</td>\n", | |
" <td id=\"T_60494_row6_col1\" class=\"data row6 col1\" >۲۰۰٬۶۳۰٫۰۰</td>\n", | |
" <td id=\"T_60494_row6_col2\" class=\"data row6 col2\" >۱۶٫۷۵</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th id=\"T_60494_level0_row7\" class=\"row_heading level0 row7\" >7</th>\n", | |
" <td id=\"T_60494_row7_col0\" class=\"data row7 col0\" >۱۴ خرداد ۱۴۰۱</td>\n", | |
" <td id=\"T_60494_row7_col1\" class=\"data row7 col1\" >۲۶۰٬۵۶۸٫۰۰</td>\n", | |
" <td id=\"T_60494_row7_col2\" class=\"data row7 col2\" >۱۹٫۷۴</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th id=\"T_60494_level0_row8\" class=\"row_heading level0 row8\" >8</th>\n", | |
" <td id=\"T_60494_row8_col0\" class=\"data row8 col0\" >۲ شهریور ۱۴۰۱</td>\n", | |
" <td id=\"T_60494_row8_col1\" class=\"data row8 col1\" >۱۴۴٬۶۵۳٫۰۰</td>\n", | |
" <td id=\"T_60494_row8_col2\" class=\"data row8 col2\" >۱۳٫۰۷</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th id=\"T_60494_level0_row9\" class=\"row_heading level0 row9\" >9</th>\n", | |
" <td id=\"T_60494_row9_col0\" class=\"data row9 col0\" >۲۲ مهر ۱۴۰۱</td>\n", | |
" <td id=\"T_60494_row9_col1\" class=\"data row9 col1\" >۱۹۱٬۴۵۳٫۰۰</td>\n", | |
" <td id=\"T_60494_row9_col2\" class=\"data row9 col2\" >۱۶٫۹۳</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n" | |
], | |
"text/plain": [ | |
"<pandas.io.formats.style.Styler at 0x12096f3e0>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"display(df.style.format(\n", | |
" {\"date\": to_persian_calendar, \"A\": persian_numbers, \"B\": persian_numbers }\n", | |
").set_table_styles(\n", | |
" [{\"selector\": \"td\", \"props\": \"direction: rtl;\"},\n", | |
" {\"selector\": \"th\", \"props\": \"direction: rtl;\"}]\n", | |
").relabel_index(\n", | |
" [\"تاریخ\", \"ا\", \"ب\"], axis=1\n", | |
").set_properties(**{'font-family': 'Noto Naskh Arabic','font-size': '16px'}))" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.12.0" | |
}, | |
"colab": { | |
"provenance": [], | |
"include_colab_link": true | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
improved display: