Skip to content

Instantly share code, notes, and snippets.

@tcvieira
Created June 24, 2021 22:13
Show Gist options
  • Save tcvieira/681c036ff4f169473a9651b6d3cbbd58 to your computer and use it in GitHub Desktop.
Save tcvieira/681c036ff4f169473a9651b6d3cbbd58 to your computer and use it in GitHub Desktop.
add_datepart.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "add_datepart.ipynb",
"provenance": [],
"authorship_tag": "ABX9TyOxr6Ca+pNh38aSyMs94dr7",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/tcvieira/681c036ff4f169473a9651b6d3cbbd58/add_datepart.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "OSCTNCXyFLLY"
},
"source": [
"# add_datepart\n",
"\n",
"Função extraída da lib fastai"
]
},
{
"cell_type": "code",
"metadata": {
"id": "PK2IxkmQFEdY"
},
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import re\n",
"\n",
"def ifnone(a,b):\n",
" \"`a` if `a` is not None, otherwise `b`.\"\n",
" return b if a is None else a\n",
"\n",
"def make_date(df, date_field):\n",
" \"Make sure `df[date_field]` is of the right date type.\"\n",
" field_dtype = df[date_field].dtype\n",
" if isinstance(field_dtype, pd.core.dtypes.dtypes.DatetimeTZDtype):\n",
" field_dtype = np.datetime64\n",
" if not np.issubdtype(field_dtype, np.datetime64):\n",
" df[date_field] = pd.to_datetime(df[date_field], infer_datetime_format=True)\n",
"\n",
"def add_datepart(df, field_name, prefix=None, drop=True, time=False):\n",
" \"Helper function that adds columns relevant to a date in the column `field_name` of `df`.\"\n",
" make_date(df, field_name)\n",
" field = df[field_name]\n",
" prefix = ifnone(prefix, re.sub('[Dd]ate$', '', field_name))\n",
" attr = ['Year', 'Month', 'Week', 'Day', 'Dayofweek', 'Dayofyear', 'Is_month_end', 'Is_month_start',\n",
" 'Is_quarter_end', 'Is_quarter_start', 'Is_year_end', 'Is_year_start']\n",
" if time: attr = attr + ['Hour', 'Minute', 'Second']\n",
" # Pandas removed `dt.week` in v1.1.10\n",
" week = field.dt.isocalendar().week.astype(field.dt.day.dtype) if hasattr(field.dt, 'isocalendar') else field.dt.week\n",
" for n in attr: df[prefix + n] = getattr(field.dt, n.lower()) if n != 'Week' else week\n",
" mask = ~field.isna()\n",
" df[prefix + 'Elapsed'] = np.where(mask,field.values.astype(np.int64) // 10 ** 9,np.nan)\n",
" if drop: df.drop(field_name, axis=1, inplace=True)\n",
" return df"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "UtI_fNkqFHkp"
},
"source": [
"df = pd.DataFrame({'date': ['2019-12-04', None, '2019-11-15', '2019-10-24']})\n",
"df = add_datepart(df, 'date')\n",
"df.head()"
],
"execution_count": null,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment