Skip to content

Instantly share code, notes, and snippets.

@andjc
Created April 13, 2024 12:46
Show Gist options
  • Save andjc/95e1da80160efa972c16cd7228408182 to your computer and use it in GitHub Desktop.
Save andjc/95e1da80160efa972c16cd7228408182 to your computer and use it in GitHub Desktop.
localised_dataframe_persian.ipynb
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/andjc/95e1da80160efa972c16cd7228408182/localised_dataframe_persian.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "8AjWb9wFfh2S"
},
"source": [
"# Localised display of Pandas dataframe: Persian example"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "PJPSEncjfh2T"
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import datetime as datetime\n",
"import icu\n",
"from functools import partial"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "UPaD-uHHfh2U"
},
"source": [
"Create a sample dataframe with 3 columns, with\n",
"\n",
"1. timezone aware datetime objects\n",
"2. integers\n",
"3. floating point numbers"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "QJphnNX_fh2V",
"outputId": "8049d358-4df6-40a4-997b-538caf7f4064"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 10 entries, 0 to 9\n",
"Data columns (total 3 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 date 10 non-null datetime64[ns, Asia/Tehran]\n",
" 1 A 10 non-null int64 \n",
" 2 B 10 non-null float64 \n",
"dtypes: datetime64[ns, Asia/Tehran](1), float64(1), int64(1)\n",
"memory usage: 372.0 bytes\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>date</th>\n",
" <th>A</th>\n",
" <th>B</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2020-02-15 00:00:00+03:30</td>\n",
" <td>81838</td>\n",
" <td>10.589499</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2020-03-03 00:00:00+03:30</td>\n",
" <td>253760</td>\n",
" <td>18.382767</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2021-03-17 00:00:00+03:30</td>\n",
" <td>201534</td>\n",
" <td>11.933844</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2021-04-16 00:00:00+04:30</td>\n",
" <td>213254</td>\n",
" <td>12.523553</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2021-05-10 00:00:00+04:30</td>\n",
" <td>232608</td>\n",
" <td>14.694209</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>2021-09-11 00:00:00+04:30</td>\n",
" <td>185136</td>\n",
" <td>11.841187</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>2021-10-30 00:00:00+03:30</td>\n",
" <td>200630</td>\n",
" <td>16.752790</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>2022-06-04 00:00:00+04:30</td>\n",
" <td>260568</td>\n",
" <td>19.738399</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>2022-08-24 00:00:00+04:30</td>\n",
" <td>144653</td>\n",
" <td>13.072700</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>2022-10-14 00:00:00+03:30</td>\n",
" <td>191453</td>\n",
" <td>16.931486</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" date A B\n",
"0 2020-02-15 00:00:00+03:30 81838 10.589499\n",
"1 2020-03-03 00:00:00+03:30 253760 18.382767\n",
"2 2021-03-17 00:00:00+03:30 201534 11.933844\n",
"3 2021-04-16 00:00:00+04:30 213254 12.523553\n",
"4 2021-05-10 00:00:00+04:30 232608 14.694209\n",
"5 2021-09-11 00:00:00+04:30 185136 11.841187\n",
"6 2021-10-30 00:00:00+03:30 200630 16.752790\n",
"7 2022-06-04 00:00:00+04:30 260568 19.738399\n",
"8 2022-08-24 00:00:00+04:30 144653 13.072700\n",
"9 2022-10-14 00:00:00+03:30 191453 16.931486"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.DataFrame({\n",
" 'date': np.random.choice(pd.date_range('2020-01-01', '2023-01-01'), 10),\n",
" 'A': np.random.randint(50000,300000,size=10),\n",
" 'B': 10 + np.random.rand(10,)*10\n",
"})\n",
"\n",
"df.sort_values(by='date', ignore_index=True, inplace = True)\n",
"\n",
"df['date'] = df['date'].dt.tz_localize('Asia/Tehran')\n",
"\n",
"df.info()\n",
"df"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "V-nz9lMyfh2V"
},
"source": [
"Set up helper functions to convert _ints_ and _floats_ to the required number system and convert between calendar systems."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "u9jGDLAtfh2W"
},
"outputs": [],
"source": [
"def convert_number_system(num, langtag, prec=2):\n",
" loc = icu.Locale.forLanguageTag(langtag)\n",
" frmter = icu.LocalizedNumberFormatter(loc).precision(icu.Precision.minMaxFraction(prec,prec))\n",
" if isinstance(num, float):\n",
" return frmter.formatDouble(num)\n",
" return frmter.formatInt(num)\n",
"\n",
"def convert_calendar(date, source_langtag, target_langtag, source_timezone, target_timezone=None):\n",
" # Source calendar\n",
" stz = icu.TimeZone.createTimeZone(source_timezone)\n",
" source_locale = icu.Locale.forLanguageTag(source_langtag)\n",
" source_cal = icu.Calendar.createInstance(stz, source_locale)\n",
" # source_dateformat = icu.DateFormat.createDateInstance(icu.DateFormat.MEDIUM, source_locale)\n",
" # Target calendar\n",
" ttz = icu.TimeZone.createTimeZone(target_timezone) if target_timezone else stz\n",
" target_locale = icu.Locale.forLanguageTag(target_langtag)\n",
" target_cal = icu.Calendar.createInstance(ttz, target_locale)\n",
" target_dateformat = icu.DateFormat.createDateInstance(icu.DateFormat.MEDIUM, target_locale)\n",
" # Convert date to ICU Date object\n",
" internal_date = source_cal.set(date.year, date.month-1, date.day, date.hour, date.minute, date.second)\n",
" return target_dateformat.format(target_cal.setTime(internal_date.getTime()))\n",
"\n",
"fa_langtag = \"fa-IR-u-ca-persian-nu-arabext\"\n",
"\n",
"persian_numbers = partial(convert_number_system, langtag=fa_langtag)\n",
"\n",
"to_persian_calendar = partial(convert_calendar,\n",
" source_langtag = 'fa-IR-u-ca-gregory',\n",
" target_langtag = fa_langtag,\n",
" source_timezone = 'Asia/Tehran',\n",
" target_timezone = None\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "0XKcy_PSfh2W"
},
"source": [
"Display localised dataframe:\n",
"\n",
"1. Relabeled columns using Persian labels instead of English column labels\n",
"2. Gregorian dates displaed using Persian (Iranian) Calendar\n",
"3. Localised numbers\n",
"4. table cells displayed RTL\n",
"5. Adjust font"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "A5HW8ry8fh2X",
"outputId": "4d0739e4-930a-4ead-ce50-5c9fa8d06351"
},
"outputs": [
{
"data": {
"text/html": [
"<style type=\"text/css\">\n",
"#T_60494 td {\n",
" direction: rtl;\n",
"}\n",
"#T_60494 th {\n",
" direction: rtl;\n",
"}\n",
"#T_60494_row0_col0, #T_60494_row0_col1, #T_60494_row0_col2, #T_60494_row1_col0, #T_60494_row1_col1, #T_60494_row1_col2, #T_60494_row2_col0, #T_60494_row2_col1, #T_60494_row2_col2, #T_60494_row3_col0, #T_60494_row3_col1, #T_60494_row3_col2, #T_60494_row4_col0, #T_60494_row4_col1, #T_60494_row4_col2, #T_60494_row5_col0, #T_60494_row5_col1, #T_60494_row5_col2, #T_60494_row6_col0, #T_60494_row6_col1, #T_60494_row6_col2, #T_60494_row7_col0, #T_60494_row7_col1, #T_60494_row7_col2, #T_60494_row8_col0, #T_60494_row8_col1, #T_60494_row8_col2, #T_60494_row9_col0, #T_60494_row9_col1, #T_60494_row9_col2 {\n",
" font-family: Noto Naskh Arabic;\n",
" font-size: 16px;\n",
"}\n",
"</style>\n",
"<table id=\"T_60494\">\n",
" <thead>\n",
" <tr>\n",
" <th class=\"blank level0\" >&nbsp;</th>\n",
" <th id=\"T_60494_level0_col0\" class=\"col_heading level0 col0\" >تاریخ</th>\n",
" <th id=\"T_60494_level0_col1\" class=\"col_heading level0 col1\" >ا</th>\n",
" <th id=\"T_60494_level0_col2\" class=\"col_heading level0 col2\" >ب</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th id=\"T_60494_level0_row0\" class=\"row_heading level0 row0\" >0</th>\n",
" <td id=\"T_60494_row0_col0\" class=\"data row0 col0\" >۲۶ بهمن ۱۳۹۸</td>\n",
" <td id=\"T_60494_row0_col1\" class=\"data row0 col1\" >۸۱٬۸۳۸٫۰۰</td>\n",
" <td id=\"T_60494_row0_col2\" class=\"data row0 col2\" >۱۰٫۵۹</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_60494_level0_row1\" class=\"row_heading level0 row1\" >1</th>\n",
" <td id=\"T_60494_row1_col0\" class=\"data row1 col0\" >۱۳ اسفند ۱۳۹۸</td>\n",
" <td id=\"T_60494_row1_col1\" class=\"data row1 col1\" >۲۵۳٬۷۶۰٫۰۰</td>\n",
" <td id=\"T_60494_row1_col2\" class=\"data row1 col2\" >۱۸٫۳۸</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_60494_level0_row2\" class=\"row_heading level0 row2\" >2</th>\n",
" <td id=\"T_60494_row2_col0\" class=\"data row2 col0\" >۲۷ اسفند ۱۳۹۹</td>\n",
" <td id=\"T_60494_row2_col1\" class=\"data row2 col1\" >۲۰۱٬۵۳۴٫۰۰</td>\n",
" <td id=\"T_60494_row2_col2\" class=\"data row2 col2\" >۱۱٫۹۳</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_60494_level0_row3\" class=\"row_heading level0 row3\" >3</th>\n",
" <td id=\"T_60494_row3_col0\" class=\"data row3 col0\" >۲۷ فروردین ۱۴۰۰</td>\n",
" <td id=\"T_60494_row3_col1\" class=\"data row3 col1\" >۲۱۳٬۲۵۴٫۰۰</td>\n",
" <td id=\"T_60494_row3_col2\" class=\"data row3 col2\" >۱۲٫۵۲</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_60494_level0_row4\" class=\"row_heading level0 row4\" >4</th>\n",
" <td id=\"T_60494_row4_col0\" class=\"data row4 col0\" >۲۰ اردیبهشت ۱۴۰۰</td>\n",
" <td id=\"T_60494_row4_col1\" class=\"data row4 col1\" >۲۳۲٬۶۰۸٫۰۰</td>\n",
" <td id=\"T_60494_row4_col2\" class=\"data row4 col2\" >۱۴٫۶۹</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_60494_level0_row5\" class=\"row_heading level0 row5\" >5</th>\n",
" <td id=\"T_60494_row5_col0\" class=\"data row5 col0\" >۲۰ شهریور ۱۴۰۰</td>\n",
" <td id=\"T_60494_row5_col1\" class=\"data row5 col1\" >۱۸۵٬۱۳۶٫۰۰</td>\n",
" <td id=\"T_60494_row5_col2\" class=\"data row5 col2\" >۱۱٫۸۴</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_60494_level0_row6\" class=\"row_heading level0 row6\" >6</th>\n",
" <td id=\"T_60494_row6_col0\" class=\"data row6 col0\" >۸ آبان ۱۴۰۰</td>\n",
" <td id=\"T_60494_row6_col1\" class=\"data row6 col1\" >۲۰۰٬۶۳۰٫۰۰</td>\n",
" <td id=\"T_60494_row6_col2\" class=\"data row6 col2\" >۱۶٫۷۵</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_60494_level0_row7\" class=\"row_heading level0 row7\" >7</th>\n",
" <td id=\"T_60494_row7_col0\" class=\"data row7 col0\" >۱۴ خرداد ۱۴۰۱</td>\n",
" <td id=\"T_60494_row7_col1\" class=\"data row7 col1\" >۲۶۰٬۵۶۸٫۰۰</td>\n",
" <td id=\"T_60494_row7_col2\" class=\"data row7 col2\" >۱۹٫۷۴</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_60494_level0_row8\" class=\"row_heading level0 row8\" >8</th>\n",
" <td id=\"T_60494_row8_col0\" class=\"data row8 col0\" >۲ شهریور ۱۴۰۱</td>\n",
" <td id=\"T_60494_row8_col1\" class=\"data row8 col1\" >۱۴۴٬۶۵۳٫۰۰</td>\n",
" <td id=\"T_60494_row8_col2\" class=\"data row8 col2\" >۱۳٫۰۷</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_60494_level0_row9\" class=\"row_heading level0 row9\" >9</th>\n",
" <td id=\"T_60494_row9_col0\" class=\"data row9 col0\" >۲۲ مهر ۱۴۰۱</td>\n",
" <td id=\"T_60494_row9_col1\" class=\"data row9 col1\" >۱۹۱٬۴۵۳٫۰۰</td>\n",
" <td id=\"T_60494_row9_col2\" class=\"data row9 col2\" >۱۶٫۹۳</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n"
],
"text/plain": [
"<pandas.io.formats.style.Styler at 0x12096f3e0>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"display(df.style.format(\n",
" {\"date\": to_persian_calendar, \"A\": persian_numbers, \"B\": persian_numbers }\n",
").set_table_styles(\n",
" [{\"selector\": \"td\", \"props\": \"direction: rtl;\"},\n",
" {\"selector\": \"th\", \"props\": \"direction: rtl;\"}]\n",
").relabel_index(\n",
" [\"تاریخ\", \"ا\", \"ب\"], axis=1\n",
").set_properties(**{'font-family': 'Noto Naskh Arabic','font-size': '16px'}))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.0"
},
"colab": {
"provenance": [],
"include_colab_link": true
}
},
"nbformat": 4,
"nbformat_minor": 0
}
@andjc
Copy link
Author

andjc commented Apr 13, 2024

improved display:

display(df.style.set_table_attributes('dir="rtl"').format(
    {"date": to_persian_calendar, "A": persian_numbers, "B": persian_numbers }
).relabel_index(
    ["تاریخ", "ا", "ب"], axis=1
).set_properties(**{'font-family': 'Noto Naskh Arabic','font-size': '16px'}))

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment