Last active
April 5, 2022 05:25
-
-
Save westandskif/6d7c82d21b3b5fd9a848767560871490 to your computer and use it in GitHub Desktop.
so-71581296/pandas-dataframe-how-to-calculate-the-difference-by-first-row-and-last-row-and
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from convtools import conversion as c | |
# tmp4.csv | |
""" | |
Category,CategoryName,time(s),Power,Vapor | |
1,A,1625448301,593233.36,3353.92 | |
1,A,1625449552,595156.24,3286.8 | |
1,A,1625450802,593833.36,3855.42 | |
2,B,1625452051,595233.37,3353.95 | |
2,B,1625453301,593535.86,3252.92 | |
2,B,1625454552,593473.36,3364.15 | |
3,C,1625455802,593754.32,3233.92 | |
3,C,1625457052,593153.46,3563.52 | |
3,C,1625458301,593854.56,3334.94 | |
4,D,1625459552,593345.75,3353.36 | |
4,D,1625460802,592313.24,3674.95 | |
4,D,1625460802,592313.24,3673.35 | |
1,A,1625463301,597313.23,3658.46 | |
1,A,1625464552,595913.68,3789.45 | |
""" | |
df = pd.read_csv("tmp4.csv") | |
col_to_idx = {column: index for index, column in enumerate(df.columns)} | |
converter = ( | |
c.chunk_by(c.item(col_to_idx["Category"])) | |
.aggregate( | |
{ | |
"Category": c.ReduceFuncs.First(c.this).item( | |
col_to_idx["Category"] | |
), | |
"CategoryName": c.ReduceFuncs.First(c.this).item( | |
col_to_idx["CategoryName"] | |
), | |
"TotalTime(s)": ( | |
c.ReduceFuncs.Last(c.this).item(col_to_idx["time(s)"]) | |
- c.ReduceFuncs.First(c.this).item(col_to_idx["time(s)"]) | |
), | |
"Power": c.ReduceFuncs.Sum(c.item(col_to_idx["Power"])), | |
"Vapor": c.ReduceFuncs.Sum(c.item(col_to_idx["Vapor"])), | |
} | |
) | |
.pipe( | |
c.group_by(c.item("Category"), c.item("CategoryName")).aggregate( | |
{ | |
"Category": c.item("Category"), | |
"CategoryName": c.item("CategoryName"), | |
"TotalTime(s)": c.ReduceFuncs.Sum(c.item("TotalTime(s)")), | |
"Power": c.ReduceFuncs.Sum(c.item("Power")), | |
"Vapor": c.ReduceFuncs.Sum(c.item("Vapor")), | |
} | |
) | |
.sort(key=lambda item: item["Category"]) | |
) | |
.gen_converter() | |
) | |
data = converter(df.to_records(index=False)) | |
# if you need to get back to a dataframe | |
df = pd.DataFrame.from_records(data) | |
""" | |
Category CategoryName TotalTime(s) Power Vapor | |
0 1 A 3752 2975449.87 17944.05 | |
1 2 B 2501 1782242.59 9971.02 | |
2 3 C 2499 1780762.34 10132.38 | |
3 4 D 1250 1777972.23 10701.66 | |
""" |
hi @blockhunts
I've updated the gist, please see above
I see, that's why my code didn't work. I didn't include which column index to be used as the sorting parameter.
Thank you very much for the help.
I hope this library will be bigger and spread further in the community 🥂
I'm glad to help! Thank you for your kind words! :)
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
How do I define a sort function to sort the data by category