Created
May 25, 2019 11:18
-
-
Save stpnk/adababc33e10b9d92e1dff0377cbccf2 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 27, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"import numpy as np" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 28, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"38 ms ± 1.86 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit\n", | |
"df_suspect = pd.DataFrame({\"quantity\":[0,0,0]}, index = pd.Series(['X01', 'X03', 'X04'], name = 'id') )\n", | |
"df_work = pd.DataFrame({\"tr_id\":[1,1,1,1,2,2,3,3,3],\n", | |
" \"id\":[\"X03\",\"X02\",\"X03\",\"X03\",'X01','X01','X04','X04','X04'], \n", | |
" \"reason\":['P','P','Q','I',\"Q\",\"I\",\"I\",\"P\",\"Q\"], \n", | |
" \"status\":[\"BROCKEN\"]+5*[\"GOOD\"] +[\"TO_REPAIR\"] + [\"BROCKEN\"]+[\"GOOD\"] ,\n", | |
" \"case\":9*[\"\"], \n", | |
" \"quantity\":[-1,-1,2,2,-1,-1,1,2,-3,]})\n", | |
"\n", | |
"\n", | |
"rooles = {(\"Q\",\"GOOD\"):[(\"P\",\"BROCKEN\"),\n", | |
" (\"I\",\"TO_REPAIR\"),\n", | |
" ],\n", | |
" (\"P\",\"REPAIRED\"):[(\"Q\",\"LOST\"),\n", | |
" (\"I\",\"TO_REPAIR\"),\n", | |
" ],\n", | |
" }\n", | |
"\n", | |
"tr_ids = set(df_work[\"tr_id\"]) #вибираємо номера документів з декількома транзакціями\n", | |
"for tr_id in tr_ids:\n", | |
" df_transaction = df_work.query(\"tr_id==@tr_id\") # group_by?\n", | |
" for row in df_transaction.itertuples():\n", | |
" for roole, r_list in rooles.items():\n", | |
" if (row.reason, row.status) == roole:\n", | |
" df_transaction_one_id = df_transaction.query(\"id == @row.id\") # group_by?\n", | |
" sum_suspect =0\n", | |
" for r_roole in r_list: \n", | |
" for r_row in df_transaction_one_id.itertuples():\n", | |
" if (r_row.reason, r_row.status) == r_roole:\n", | |
" sum_suspect += r_row.quantity # group_by?\n", | |
" df_work.loc[r_row.Index, \"case\"] = str(roole)+\":\"+str(r_roole)\n", | |
" if sum_suspect:\n", | |
" df_suspect.loc[row.id,\"quantity\"] += sum_suspect\n", | |
"df_work = df_work.query(\"case != ''\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 29, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"37.5 ms ± 1.73 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit\n", | |
"df_suspect = pd.DataFrame({\"quantity\":[0,0,0]}, index = pd.Series(['X01', 'X03', 'X04'], name = 'id') )\n", | |
"df_work = pd.DataFrame({\"tr_id\":[1,1,1,1,2,2,3,3,3],\n", | |
" \"id\":[\"X03\",\"X02\",\"X03\",\"X03\",'X01','X01','X04','X04','X04'], \n", | |
" \"reason\":['P','P','Q','I',\"Q\",\"I\",\"I\",\"P\",\"Q\"], \n", | |
" \"status\":[\"BROCKEN\"]+5*[\"GOOD\"] +[\"TO_REPAIR\"] + [\"BROCKEN\"]+[\"GOOD\"] ,\n", | |
" \"case\":9*[\"\"], \n", | |
" \"quantity\":[-1,-1,2,2,-1,-1,1,2,-3,]})\n", | |
"\n", | |
"def generate_case(g):\n", | |
" \"\"\"Функция для заполнения поля case\n", | |
" g - группа записей в виде датафрейма\n", | |
" \"\"\"\n", | |
" \n", | |
" # Я немного изменил словарь с правилами.\n", | |
" # Посчитал, что будет проще работать со\n", | |
" # строками в качестве ключей и значений, \n", | |
" # чем с кортежами.\n", | |
" rules = {\n", | |
" \"Q-GOOD\":[\n", | |
" \"P-BROKEN\",\n", | |
" \"I-TO_REPAIR\",\n", | |
" ],\n", | |
" \"P-REPAIRED\":[\n", | |
" \"Q-LOST\",\n", | |
" \"I-TO_REPAIR\",\n", | |
" ],\n", | |
" }\n", | |
" \n", | |
" # Предварительно заполняем столбец case\n", | |
" # значениями \"<reason>-<status>\".\n", | |
" g.case = g.reason.str.cat(g.status, sep='-')\n", | |
" \n", | |
" # Проверяем есть ли значения из case\n", | |
" # среди ключей в словаре правил.\n", | |
" # Если найдено хотя бы одно совпадение (.any()),\n", | |
" # то продолжаем работать с этой группой.\n", | |
" if g.case.isin(rules).any():\n", | |
" \n", | |
" # Мы знаем, что в группе записей для одного товара\n", | |
" # в пределах одного документа может быть только\n", | |
" # одна запись с хорошим состоянием.\n", | |
" # Получим значение.\n", | |
" good_condition = g[g.case.isin(rules)].case.values[0]\n", | |
" #good_condition = g.query('case in @rules').case.values[0]\n", | |
"\n", | |
" # Достаем из словаря правил список подозрительных\n", | |
" # значений для данного хорошего состояния.\n", | |
" suspicious_conditions = rules[good_condition]\n", | |
" \n", | |
" # Перезаполняем case.\n", | |
" # Если значение ячейки case есть в списке\n", | |
" # подозрительных проводок для данного ключа, то\n", | |
" # заполняем case - \"<good_condition>:<suspicious_condition>\",\n", | |
" # иначе записываем nan.\n", | |
" g.case = np.where(g.case.isin(suspicious_conditions),\n", | |
" good_condition + ':' + g.case,\n", | |
" np.nan)\n", | |
" \n", | |
" else:\n", | |
" g.case = np.nan\n", | |
" \n", | |
" return g\n", | |
"\n", | |
"\n", | |
"df_work = (df_work.groupby(['tr_id', 'id']) # Группируем по id документа и id товара. \n", | |
" \n", | |
" # Применяем функцию для заполнения case\n", | |
" # Будут заполнены только ячейки, удовлетворяющие правилам.\n", | |
" .apply(generate_case)\n", | |
" \n", | |
" # Выбрасываем записи с пустым полем case\n", | |
" .dropna(subset=['case']))\n", | |
"\n", | |
"# Берем из df_work столбцы 'id' и 'quantity'.\n", | |
"# Группируем по id товара и суммируем quantity\n", | |
"df_quantity_sum = (df_work.loc[:, ['id', 'quantity']]\n", | |
" .set_index('id')\n", | |
" .groupby('id')\n", | |
" .sum())\n", | |
"\n", | |
"\n", | |
"# Суммируем получившийся датафрейм с df_suspect,\n", | |
"# который создали ранее. \n", | |
"# Пустые значения меняем на 0.\n", | |
"# Конвертируем в int.\n", | |
"df_suspect = (df_suspect + df_quantity_sum).fillna(0).astype(int)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "pandas_venv", | |
"language": "python", | |
"name": "pandas_venv" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.3" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment