Created
September 4, 2019 18:09
-
-
Save andy-pi/e7c17337a4cabb37e1efb59818744622 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import glob | |
csv_files = glob.glob("/*.CSV") # get list of files in directory | |
for csv_file in csv_files: # loop over all files in directory | |
data = pd.read_csv(csv_file) # load csv file | |
data = data[data["Fee"] != 0] # remove all rows where fee=0 (aka non purchases) | |
data = data[data["Gross"] > 0] # remove all rows where income < 0 (i.e. -ve) | |
data = data[['Subject', 'Gross']] # choose only subject and gross income columns | |
data['Subject']= data['Subject'].replace({'Old name': 'New name'}) # Manually rename duplicates with different names | |
data = data.groupby('Subject')['Gross'].agg(['sum','count']) # group by subject and aggregate the rows | |
data.index.names = ['Item Name'] # rename index to 'Item Name' | |
# get month from filename and rename columns | |
month = csv_file.split(".")[0].split("/")[-1] | |
month_count = month + ' Quantity Sold' | |
month_value = month + ' Sales in £' | |
data.rename(columns={'sum': month_value, 'count': month_count}, inplace=True) | |
# merge the each months into one dataframe | |
if csv_file == csv_files[0]: | |
fulldf = data | |
else: | |
fulldf = fulldf.merge(data, on='Item Name', how='outer') | |
fulldf.fillna(0, inplace=True) # replace all NaN with 0 | |
fulldf.drop(['Personal Item 1','Personal Item 2']) # remove personal sales with a specific name | |
quantity_cols = [col for col in fulldf.columns if 'Quantity' in col] # Choose quantity col labels | |
fulldf[quantity_cols] = fulldf[quantity_cols].astype(int) # convert quantity cols to type(int) | |
quantity_df = fulldf[quantity_cols] # select quantity cols only | |
quantity_df['Mean Quantity'] = quantity_df.mean(axis=1) # get mean over the time period | |
quantity_df.loc['Total']= quantity_df.sum() # get total of all rows | |
quantity_df.to_csv(r'/eBay Report by Quantity.csv') # Export dataframe back to CSV |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment