Skip to content

Instantly share code, notes, and snippets.

@jonchen727
Created October 28, 2019 22:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jonchen727/ab46aabb415bc48ab59178a60bbbaba9 to your computer and use it in GitHub Desktop.
Save jonchen727/ab46aabb415bc48ab59178a60bbbaba9 to your computer and use it in GitHub Desktop.
import pandas as pd
import glob
import datetime
import sys
import os
import os.path
from os import listdir
from os.path import isfile, join
import glob
import re
import numpy as np
print("Looking for Old Transformation File")
if os.path.exists('Cleaned Data.csv'):
print("Old File Found")
print("Removing Old Transformation File")
os.remove("Cleaned Data.csv")
else:
print("Transformation File Does Not Exist")
pd.set_option('display.max_rows',1000)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
my_path = os.path.abspath('')
Analog = os.path.join(my_path, "Raw","Full Analog","*.csv").replace("\\","/")
Backwash =os.path.join(my_path,"Raw","Full Backwash","*.csv").replace("\\","/")
OM = os.path.join(my_path,"Raw","Full Operation Mode","*.csv").replace("\\","/")
print("Importing and Combining All Files in Folder...")
print(Analog)
print(Backwash)
print(OM)
all_analog = glob.glob(Analog)
all_backwash=glob.glob(Backwash)
all_om=glob.glob(OM)
li1 = []
li2 = []
li3 = []
for filename in all_analog:
df1 = pd.read_csv(filename, index_col=False,low_memory=False)
li1.append(df1)
df1 = pd.concat(li1, axis=0, ignore_index=True)
for filename in all_backwash:
df2 = pd.read_csv(filename, index_col=False,low_memory=False)
li2.append(df2)
df2 = pd.concat(li2, axis=0, ignore_index=True)
for filename in all_om:
df3 = pd.read_csv(filename, index_col=False,low_memory=False)
li3.append(df3)
df3 = pd.concat(li3, axis=0, ignore_index=True)
df1['Time'] = pd.to_datetime(df1['Time'].astype(str), format= "%Y %m %d %H:%M:%S")
df2['Time'] = pd.to_datetime(df2['Time'].astype(str), format= "%Y %m %d %H:%M:%S")
df3['Time'] = pd.to_datetime(df3['Time'].astype(str), format= "%Y %m %d %H:%M:%S")
df1.set_index('Time')
df2.set_index('Time')
df3.set_index('Time')
df_outer = pd.merge(df1,df2, on='Time', how='outer',sort=True)
df_outer = pd.merge(df_outer,df3, on='Time',how='outer',sort=True)
df_outer=df_outer.fillna(method='ffill')
df4=df_outer
print("Cleaning Analog, Backwash, and Operation Mode Data")
df4['[B] Wet Weather Step Description'] = np.where((df4['BBF2.WET_WEATHER.STEP']==3.0), "Filtering",
np.where((df4['BBF2.WET_WEATHER.STEP']==5.0), "Backwashing",
np.where((df4['BBF2.WET_WEATHER.STEP']==2.0),"Start of Filtering",
np.where((df4['BBF2.WET_WEATHER.STEP']==6.0), "End of Backwash",
np.where((df4['BBF2.WET_WEATHER.STEP']==4.0),"End of Filtering",
np.where((df4['BBF2.WET_WEATHER.STEP']==0),"Idle",None))))))
df4['[F] Primary Step Description'] = np.where((df4['BBF1.PRIMARY.STEP']==3.0), "Filtering",
np.where((df4['BBF1.PRIMARY.STEP']==5.0), "Backwashing",
np.where((df4['BBF1.PRIMARY.STEP']==2.0),"Start of Filtering",
np.where((df4['BBF1.PRIMARY.STEP']==6.0), "End of Backwash",
np.where((df4['BBF1.PRIMARY.STEP']==4.0),"End of Filtering",
np.where((df4['BBF1.PRIMARY.STEP']==0),"Idle",None))))))
df4['[F] Wet Weather Step Description'] = np.where((df4['BBF1.WET_WEATHER.STEP']==3.0), "Filtering",
np.where((df4['BBF1.WET_WEATHER.STEP']==5.0), "Backwashing",
np.where((df4['BBF1.WET_WEATHER.STEP']==2.0),"Start of Filtering",
np.where((df4['BBF1.WET_WEATHER.STEP']==6.0), "End of Backwash",
np.where((df4['BBF1.WET_WEATHER.STEP']==4.0),"End of Filtering",
np.where((df4['BBF1.WET_WEATHER.STEP']==0),"Idle",None))))))
df4['[B] Primary Step Description'] = np.where((df4['BBF2.PRIMARY.STEP']==3.0), "Filtering",
np.where((df4['BBF2.PRIMARY.STEP']==5.0), "Backwashing",
np.where((df4['BBF2.PRIMARY.STEP']==2.0),"Start of Filtering",
np.where((df4['BBF2.PRIMARY.STEP']==6.0), "End of Backwash",
np.where((df4['BBF2.PRIMARY.STEP']==4.0),"End of Filtering",
np.where((df4['BBF2.PRIMARY.STEP']==0),"Idle",None))))))
df4['[B] Backwash Step Description'] = np.where((df4['BBF2.BACKWASH.STEP']==0), "Not in BW",
np.where((df4['BBF2.BACKWASH.STEP']==2.0),"Step 1: Idle",
np.where((df4['BBF2.BACKWASH.STEP']==3.0), "Step 2: Drain",
np.where((df4['BBF2.BACKWASH.STEP']==4.0),"Step 3: Idle",
np.where((df4['BBF2.BACKWASH.STEP']==5.0),"Step 4: Air",
np.where((df4['BBF2.BACKWASH.STEP']==6.0),"Step 5: Idle",
np.where((df4['BBF2.BACKWASH.STEP']==7.0),"Step 6: Drain",
np.where((df4['BBF2.BACKWASH.STEP']==8.0),"Step 7: Idle",
np.where((df4['BBF2.BACKWASH.STEP']==9.0),"Step 8: Drain",
np.where((df4['BBF2.BACKWASH.STEP']==10.0),"Step: 9 Idle",
np.where((df4['BBF2.BACKWASH.STEP']==11.0),"Step 10: Drain",
np.where((df4['BBF2.BACKWASH.STEP']==12.0),"Step 11: Idle",None))))))))))))
df4['[F] Backwash Step Description'] = np.where((df4['BBF1.BACKWASH.STEP']==0), "Not in BW",
np.where((df4['BBF1.BACKWASH.STEP']==2.0),"Step 1: Idle",
np.where((df4['BBF1.BACKWASH.STEP']==3.0), "Step 2: Drain",
np.where((df4['BBF1.BACKWASH.STEP']==4.0),"Step 3: Idle",
np.where((df4['BBF1.BACKWASH.STEP']==5.0),"Step 4: Air",
np.where((df4['BBF1.BACKWASH.STEP']==6.0),"Step 5: Idle",
np.where((df4['BBF1.BACKWASH.STEP']==7.0),"Step 6: Drain",
np.where((df4['BBF1.BACKWASH.STEP']==8.0),"Step 7: Idle",
np.where((df4['BBF1.BACKWASH.STEP']==9.0),"Step 8: Drain",
np.where((df4['BBF1.BACKWASH.STEP']==10.0),"Step: 9 Idle",
np.where((df4['BBF1.BACKWASH.STEP']==11.0),"Step 10: Drain",
np.where((df4['BBF1.BACKWASH.STEP']==12.0),"Step 11: Idle",None))))))))))))
df4['[B] Recirculation Step Description'] = np.where((df4['BBF2.RECIRC.STEP']==2.0),"Recirculating","Idle")
df4['[F] Recirculation Step Description'] = np.where((df4['BBF1.RECIRC.STEP']==2.0),"Recirculating","Idle")
df4 = df4.rename(columns={'SENSORS.INFLUENT.SST': 'newName1'})
df4.drop(([df4.columns[1],df4.columns[15],df4.columns[16],df4.columns[30]]),axis=1,inplace=True)
df4 = df4.rename(columns={df4.columns[0]: 'DateTime',
df4.columns[1] : "[LF] Influent ORP (mV)",
df4.columns[2] : "[F] Reactor DO (ppm)",
df4.columns[3] : "[F] Reactor pH",
df4.columns[4] : "[F] Reactor Temp (F)",
df4.columns[5] : "[F] Effulent TSS (ppm)",
df4.columns[6] : "[F] Reactor Pressure (psi)",
df4.columns[7] : "[F] Influent Flow (gpm)",
df4.columns[8] : "[B] Reactor DO (ppm)",
df4.columns[9] : "[B] Reactor pH",
df4.columns[10] : "[B] Reactor Temp (F)",
df4.columns[11] : "[B] Effulent TSS (ppm)",
df4.columns[12] : "[B] Reactor Pressure (psi)",
df4.columns[13] : "[B] Influent Flow (gpm)",
df4.columns[14] : "[Coag] Stock Solution Conc. (ppm)",
df4.columns[15] : "[Coag] Influent Final Conc. SP (ppm)",
df4.columns[16] : "[Coag] ",
df4.columns[17] : "[Poly] Stock Solution Conc. (ppm)",
df4.columns[18] : "[Poly] Influent Final Conc. SP (ppm)",
df4.columns[19] : "[Poly] ",
df4.columns[20] : "[HF] Pump Hz",
df4.columns[21] : "[LF] Pump Hz",
df4.columns[22] : "[Coag] Pump Flowrate (mL/min)",
df4.columns[23] : "[Poly] Pump Flowrate (mL/min)",
df4.columns[24] : "[HF] Influent TSS (ppm)",
df4.columns[25] : "[LF] Influent TSS (ppm)",
df4.columns[26] : "[LF] Influent Temperature (F)",
df4.columns[27] : "[B] Effulent ORP (mV)",
df4.columns[28] : "[F] Reactor Linear Velocity (m/hr)",
df4.columns[29] : "[B] Reactor EBCT (min)",
df4.columns[30] : "[B+F] Backwash Flowrate (gpm)",
df4.columns[31] : "[F] Primary Step Number",
df4.columns[32] : "[F] Backwash Step Number",
df4.columns[33] : "[F] Wet Weather Step Number",
df4.columns[34] : "[F] Recirculation Step Number",
df4.columns[35] : "[B] Backwash Step Number",
df4.columns[36] : "[B] Primary Step Number",
df4.columns[37] : "[B] Wet Weather Step Number",
df4.columns[38] : "[B] Recirculation Step Number"})
df4.drop(([df4.columns[16],df4.columns[19]]),axis=1,inplace=True)
df4['Time Delta (min)'] = df4['DateTime'].shift(-1)-df4['DateTime']
df4['Time Delta (min)'] = df4['Time Delta (min)'].dt.total_seconds()/60
avgfin= (df4['[F] Influent Flow (gpm)']+df4['[F] Influent Flow (gpm)'].shift(-1))/2
avgbin= (df4['[B] Influent Flow (gpm)']+df4['[B] Influent Flow (gpm)'].shift(-1))/2
avgbw = (df4['[B+F] Backwash Flowrate (gpm)']+df4['[B+F] Backwash Flowrate (gpm)'].shift(-1))/2
df4['[F] Reactor Pressure during Filtration (psi)'] = np.where((df4[df4.columns[38]]=="Filtering"),df4[df4.columns[6]], None)
df4['[F] Reactor Linear Velocity during Filtration (m/hr)']=np.where((df4[df4.columns[38]]=="Filtering"),df4[df4.columns[26]],None)
df4['[F] Influent TSS during Filtration (ppm)'] = np.where((df4[df4.columns[38]]=="Filtering")&(df4[df4.columns[22]]<2000)&(df4[df4.columns[22]]>0),df4[df4.columns[22]], np.where((df4[df4.columns[31]]=="Filtering")&(df4[df4.columns[23]]<2000)&(df4[df4.columns[23]]>0),df4[df4.columns[23]],None))
df4['[F] Effulent TSS during Filtration (ppm)'] = np.where((df4[df4.columns[38]]=="Filtering")&(df4[df4.columns[5]]<2000)&(df4[df4.columns[5]]>0),df4[df4.columns[5]],None)
df4['[F] TSS Removal during Filtration (%)'] = np.where((df4[df4.columns[48]]>0)&(df4[df4.columns[49]]>0),1-(df4[df4.columns[49]]/df4[df4.columns[48]]),None)
df4['[F] Filtration Volume (gal)'] = np.where((df4[df4.columns[38]]=="Filtering"),avgfin*df4['Time Delta (min)'], None)
df4['[F] Backwash Volume (gal)'] = np.where((df4[df4.columns[42]].str.contains('Drain', case=True,regex=True)),avgbw*df4['Time Delta (min)'],None)
df4['[F] Idle Duration (min)'] = np.where((df4[df4.columns[42]].str.contains('Idle', case=True,regex=True)),df4['Time Delta (min)'],None)
df4['[F] Drain Duration (min)'] = np.where((df4[df4.columns[42]].str.contains('Drain', case=True,regex=True)),df4['Time Delta (min)'],None)
df4['[F] Air Duration (min)'] = np.where((df4[df4.columns[42]].str.contains('Air', case=True,regex=True)),df4['Time Delta (min)'],None)
df4['[F] Backwash Duration (min)'] = np.where((df4[df4.columns[42]]!='Not in BW')&(df4[df4.columns[42]].notnull()),df4['Time Delta (min)'],None)
df4['[B] Influent ORP during Filtration (mV)'] = np.where((df4[df4.columns[37]]=="Filtering"),df4[df4.columns[1]], None)
df4['[B] Effulent ORP during Filtration (mV)'] = np.where((df4[df4.columns[37]]=="Filtering"),df4[df4.columns[25]], None)
df4['[B] Reactor Pressure during Filtration (psi)'] = np.where((df4[df4.columns[37]]=="Filtering"),df4[df4.columns[12]], None)
df4['[B] Reactor Linear Velocity during Filtration (m/hr)']=np.where((df4[df4.columns[37]]=="Filtering"),0.1945525292*df4[df4.columns[13]],None)
df4['[B] Influent TSS during Filtration (ppm)'] = np.where((df4[df4.columns[37]]=="Filtering")&(df4[df4.columns[23]]<2000)&(df4[df4.columns[23]]>0),df4[df4.columns[23]], np.where((df4[df4.columns[34]]=="Filtering")&(df4[df4.columns[22]]<2000)&(df4[df4.columns[22]]>0),df4[df4.columns[22]],None))
df4['[B] Effulent TSS during Filtration (ppm)'] = np.where((df4[df4.columns[37]]=="Filtering")&(df4[df4.columns[11]]<2000)&(df4[df4.columns[11]]>0),df4[df4.columns[11]],None)
df4['[B] TSS Removal during Filtration (%)'] = np.where((df4[df4.columns[62]]>0)&(df4[df4.columns[61]]>0),1-(df4[df4.columns[62]]/df4[df4.columns[61]]),None)
df4['[B] Filtration Volume (gal)'] = np.where((df4[df4.columns[37]]=="Filtering"),avgbin*df4['Time Delta (min)'], None)
df4['[B] Filtration Duration (gal)'] = np.where((df4[df4.columns[37]]=="Filtering"),df4['Time Delta (min)'], None)
df4['[B] Backwash Volume (gal)'] = np.where((df4[df4.columns[41]].str.contains('Drain', case=True,regex=True)),avgbw*df4['Time Delta (min)'],None)
df4['[B] Idle Duration (min)'] = np.where((df4[df4.columns[41]].str.contains('Idle', case=True,regex=True)),df4['Time Delta (min)'],None)
df4['[B] Drain Duration (min)'] = np.where((df4[df4.columns[41]].str.contains('Drain', case=True,regex=True)),df4['Time Delta (min)'],None)
df4['[B] Air Duration (min)'] = np.where((df4[df4.columns[41]].str.contains('Air', case=True,regex=True)),df4['Time Delta (min)'],None)
df4['[B] Backwash Duration (min)'] = np.where((df4[df4.columns[41]]!='Not in BW')&(df4[df4.columns[41]].notnull()),df4['Time Delta (min)'],None)
df4['[B] Empty Bed Contact Time (min)']=np.where((df4[df4.columns[37]]=="Filtering"),df4['[B] Reactor EBCT (min)'],None)
df4['[F] Empty Bed Contact Time (min)']=np.where((df4[df4.columns[38]]=="Filtering"),616.84364/df4['[F] Influent Flow (gpm)'],None)
df4['[F] Filtration Duration (min)'] = np.where((df4[df4.columns[38]]=="Filtering"),df4['Time Delta (min)'], None)
df4['Tandum Backwash'] = np.where((df4[df4.columns[37]]=="Backwashing") & (df4[df4.columns[38]]=='Backwashing'),True,None)
df4['[F] New Cycle'] = np.where((df4[df4.columns[38]]=='Start of Filtering')|(df4[df4.columns[39]]=='Start of Filtering'),True,None)
df4['[B] New Cycle'] = np.where((df4[df4.columns[37]]=='Start of Filtering')|(df4[df4.columns[40]]=='Start of Filtering'),True,None)
df4['[F] Backwash Count'] = np.where(((df4[df4.columns[38]]=='Backwashing')&(df4[df4.columns[38]].shift(-1)!='Backwashing'))|((df4[df4.columns[39]]=='Backwashing')&(df4[df4.columns[39]].shift(-1)!='Backwashing')),True,None)
df4['[B] Backwash Count'] = np.where(((df4[df4.columns[37]]=='Backwashing')&(df4[df4.columns[37]].shift(-1)!='Backwashing'))|((df4[df4.columns[40]]=='Backwashing')&(df4[df4.columns[40]].shift(-1)!='Backwashing')),True,None)
Date, Time = zip(*[(d.date(), d.time()) for d in df4['DateTime']])
df4 = df4.assign(Date=Date, Time=Time)
df4 = df4.rename(columns={df4.columns[0]: 'Date/Time'})
df5 = pd.read_csv("Test Number Dates.csv", index_col=False,low_memory=False)
df5['Date/Time'] = pd.to_datetime(df5['Date/Time'].astype(str), format= "%m/%d/%Y %H:%M")
df4.set_index('Date/Time')
df5.set_index('Date/Time')
df4 = pd.merge_asof(df4,df5, on='Date/Time', direction='backward')
df4['Test Number']=df4['Test Number'].fillna(method='ffill')
print("Saving Data to CSV")
df4.to_csv("Cleaned Data.csv")
print(str(len(df4.index))+" Rows Exported")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment