Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save VasilijKolomiets/4a773f10ae0bb0e6f561b7abf0fa5f2e to your computer and use it in GitHub Desktop.
Save VasilijKolomiets/4a773f10ae0bb0e6f561b7abf0fa5f2e to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 110,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 111,
"metadata": {},
"outputs": [],
"source": [
"df_suspect = pd.DataFrame({\"quantity\":[0,0,0]}, index = pd.Series(['X01', 'X03', 'X04'], name = 'id') )\n",
"df_work = pd.DataFrame({\"tr_id\":[1,1,1,1,2,2,3,3,3],\n",
" \"id\":[\"X03\",\"X02\",\"X03\",\"X03\",'X01','X01','X04','X04','X04'], \n",
" \"reason\":['P','P','Q','I',\"Q\",\"I\",\"I\",\"P\",\"Q\"], \n",
" \"status\":[\"BROCKEN\"]+5*[\"GOOD\"] +[\"TO_REPAIR\"] + [\"BROCKEN\"]+[\"GOOD\"] ,\n",
" \"case\":9*[\"\"], \n",
" \"quantity\":[-1,-1,2,2,-1,-1,1,2,-3,]})\n",
"\n",
"\n",
"rooles = {(\"Q\",\"GOOD\"):[(\"P\",\"BROCKEN\"),\n",
" (\"I\",\"TO_REPAIR\"),\n",
" ],\n",
" (\"P\",\"REPAIRED\"):[(\"Q\",\"LOST\"),\n",
" (\"I\",\"TO_REPAIR\"),\n",
" ],\n",
" }\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_work"
]
},
{
"cell_type": "code",
"execution_count": 112,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"144\n"
]
}
],
"source": [
"n=4\n",
"for i in range(n):\n",
" df_work = df_work.append(df_work, ignore_index=True)\n",
"print(len(df_work))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_work.head(30)"
]
},
{
"cell_type": "code",
"execution_count": 113,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"792 ms ± 33.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%%timeit\n",
"tr_ids = set(df_work[\"tr_id\"]) #вибираємо номера документів з декількома транзакціями\n",
"for tr_id in tr_ids:\n",
" df_transaction = df_work.query(\"tr_id==@tr_id\") # group_by?\n",
"# df_transaction = df_work[df_work[\"tr_id\"] == tr_id]\n",
" for row in df_transaction.itertuples():\n",
" for roole, r_list in rooles.items():\n",
" if (row.reason, row.status) == roole:\n",
" df_transaction_one_id = df_transaction.query(\"id == @row.id\") # group_by?\n",
" sum_suspect =0\n",
" for r_roole in r_list: \n",
" for r_row in df_transaction_one_id.itertuples():\n",
" if (r_row.reason, r_row.status) == r_roole:\n",
" sum_suspect += r_row.quantity # group_by?\n",
" df_work.loc[r_row.Index, \"case\"] = str(roole)+\":\"+str(r_roole)\n",
" if sum_suspect:\n",
" df_suspect.loc[row.id,\"quantity\"] += sum_suspect\n",
"df_rez = df_work.query(\"case != ''\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_rez"
]
},
{
"cell_type": "code",
"execution_count": 114,
"metadata": {},
"outputs": [],
"source": [
"df_suspect = pd.DataFrame({\"quantity\":[0,0,0]}, index = pd.Series(['X01', 'X03', 'X04'], name = 'id') )\n",
"df_work = pd.DataFrame({\"tr_id\":[1,1,1,1,2,2,3,3,3],\n",
" \"id\":[\"X03\",\"X02\",\"X03\",\"X03\",'X01','X01','X04','X04','X04'], \n",
" \"reason\":['P','P','Q','I',\"Q\",\"I\",\"I\",\"P\",\"Q\"], \n",
" \"status\":[\"BROKEN\"]+5*[\"GOOD\"] +[\"TO_REPAIR\"] + [\"BROKEN\"]+[\"GOOD\"] ,\n",
" \"case\":9*[\"\"], \n",
" \"quantity\":[-1,-1,2,2,-1,-1,1,2,-3,]})"
]
},
{
"cell_type": "code",
"execution_count": 115,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"144\n"
]
}
],
"source": [
"n=4\n",
"for i in range(n):\n",
" df_work = df_work.append(df_work, ignore_index=True)\n",
"print(len(df_work))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_work"
]
},
{
"cell_type": "code",
"execution_count": 116,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"39.8 ms ± 4.14 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
]
}
],
"source": [
"%%timeit\n",
"def generate_case(g):\n",
" \"\"\"Функция для заполнения поля case\n",
" g - группа записей в виде датафрейма\n",
" \"\"\"\n",
"\n",
" # Я немного изменил словарь с правилами.\n",
" # Посчитал, что будет проще работать со\n",
" # строками в качестве ключей и значений, \n",
" # чем с кортежами.\n",
" rules = {\n",
" \"Q-GOOD\":[\n",
" \"P-BROKEN\",\n",
" \"I-TO_REPAIR\",\n",
" ],\n",
" \"P-REPAIRED\":[\n",
" \"Q-LOST\",\n",
" \"I-TO_REPAIR\",\n",
" ],\n",
" }\n",
"\n",
" # Предварительно заполняем столбец case\n",
" # значениями \"<reason>-<status>\".\n",
" g.case = g.reason.str.cat(g.status, sep='-')\n",
"\n",
" # Проверяем есть ли значения из case\n",
" # среди ключей в словаре правил.\n",
" # Если найдено хотя бы одно совпадение (.any()),\n",
" # то продолжаем работать с этой группой.\n",
" if g.case.isin(rules).any():\n",
"\n",
" # Мы знаем, что в группе записей для одного товара\n",
" # в пределах одного документа может быть только\n",
" # одна запись с хорошим состоянием.\n",
" # Получим значение.\n",
" good_condition = g[g.case.isin(rules)].case.values[0]\n",
" #good_condition = g.query('case in @rules').case.values[0]\n",
"\n",
" # Достаем из словаря правил список подозрительных\n",
" # значений для данного хорошего состояния.\n",
" suspicious_conditions = rules[good_condition]\n",
"\n",
" # Перезаполняем case.\n",
" # Если значение ячейки case есть в списке\n",
" # подозрительных проводок для данного ключа, то\n",
" # заполняем case - \"<good_condition>:<suspicious_condition>\",\n",
" # иначе записываем nan.\n",
" g.case = np.where(g.case.isin(suspicious_conditions),\n",
" good_condition + ':' + g.case,\n",
" np.nan)\n",
"\n",
" else:\n",
" g.case = np.nan\n",
"\n",
" return g\n",
"\n",
"df_rez = (df_work.groupby(['tr_id', 'id']) # Группируем по id документа и id товара. \n",
" \n",
" # Применяем функцию для заполнения case\n",
" # Будут заполнены только ячейки, удовлетворяющие правилам.\n",
" .apply(generate_case)\n",
" \n",
" # Выбрасываем записи с пустым полем case\n",
" .dropna(subset=['case']))\n",
"\n",
"# Берем из df_work столбцы 'id' и 'quantity'.\n",
"# Группируем по id товара и суммируем quantity\n",
"df_quantity_sum = (df_rez.loc[:, ['id', 'quantity']]\n",
" .set_index('id')\n",
" .groupby('id')\n",
" .sum())\n",
"\n",
"# Суммируем получившийся датафрейм с df_suspect,\n",
"# который создали ранее. \n",
"# Пустые значения меняем на 0.\n",
"# Конвертируем в int.\n",
"df_suspect_rez = (df_suspect + df_quantity_sum).fillna(0).astype(int)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_rez"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"help(df_work.append)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_suspect_rez"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment