Skip to content

Instantly share code, notes, and snippets.

@jonchen727
Created October 28, 2019 22:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jonchen727/0d5743f6fb133f82d68e51448c6ce726 to your computer and use it in GitHub Desktop.
Save jonchen727/0d5743f6fb133f82d68e51448c6ce726 to your computer and use it in GitHub Desktop.
import pandas as pd
import glob
import datetime
import sys
import os
import os.path
from os import listdir
from os.path import isfile, join
import re
import numpy as np
print("Looking for Old Transformation File")
if os.path.exists('Cleaned Data Starvation.csv'):
print("Old File Found")
print("Removing Old Transformation File")
os.remove("Cleaned Data Starvation.csv")
else:
print("Transformation File Does Not Exist")
pd.set_option('display.max_rows',1000)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
my_path = os.path.abspath('')
Analog = os.path.join(my_path, "Raw","Starvation Analog","*.csv").replace("\\","/")
Digital =os.path.join(my_path,"Raw","Starvation Digital","*.csv").replace("\\","/")
OM = os.path.join(my_path,"Raw","Starvation Operation Mode","*.csv").replace("\\","/")
print("Importing and Combining All Files in Folder...")
print(Analog)
print(Digital)
print(OM)
all_analog = glob.glob(Analog)
all_dig=glob.glob(Digital)
all_om=glob.glob(OM)
li1 = []
li2 = []
li3 = []
for filename in all_analog:
df1 = pd.read_csv(filename, index_col=False,low_memory=False)
li1.append(df1)
df1 = pd.concat(li1, axis=0, ignore_index=True)
for filename in all_om:
df2 = pd.read_csv(filename, index_col=False,low_memory=False)
li2.append(df2)
df2 = pd.concat(li2, axis=0, ignore_index=True)
for filename in all_dig:
df3 = pd.read_csv(filename, index_col=False,low_memory=False)
li3.append(df3)
df3 = pd.concat(li3, axis=0, ignore_index=True)
df1['Time'] = pd.to_datetime(df1['Time'].astype(str), format= "%Y %m %d %H:%M:%S")
df2['Time'] = pd.to_datetime(df2['Time'].astype(str), format= "%Y %m %d %H:%M:%S")
df3['Time'] = pd.to_datetime(df3['Time'].astype(str), format= "%Y %m %d %H:%M:%S")
df1.set_index('Time')
df2.set_index('Time')
df3.set_index('Time')
df_outer = pd.merge(df1,df2, on='Time', how='outer',sort=True)
df_outer = pd.merge(df_outer,df3, on='Time', how='outer',sort=True)
df_outer=df_outer.fillna(method='ffill')
df4=df_outer
print("Cleaning Analog, Backwash, and Operation Mode Data")
df4['[B] Wet Weather Step Description'] = np.where((df4['BBF2.WET_WEATHER.STEP']==3.0), "Filtering",
np.where((df4['BBF2.WET_WEATHER.STEP']==5.0), "Backwashing",
np.where((df4['BBF2.WET_WEATHER.STEP']==2.0),"Start of Filtering",
np.where((df4['BBF2.WET_WEATHER.STEP']==6.0), "End of Backwash",
np.where((df4['BBF2.WET_WEATHER.STEP']==4.0),"End of Filtering",
np.where((df4['BBF2.WET_WEATHER.STEP']==0),"Idle",None))))))
df4['[F] Primary Step Description'] = np.where((df4['BBF1.PRIMARY.STEP']==3.0), "Filtering",
np.where((df4['BBF1.PRIMARY.STEP']==5.0), "Backwashing",
np.where((df4['BBF1.PRIMARY.STEP']==2.0),"Start of Filtering",
np.where((df4['BBF1.PRIMARY.STEP']==6.0), "End of Backwash",
np.where((df4['BBF1.PRIMARY.STEP']==4.0),"End of Filtering",
np.where((df4['BBF1.PRIMARY.STEP']==0),"Idle",None))))))
df4['[F] Wet Weather Step Description'] = np.where((df4['BBF1.WET_WEATHER.STEP']==3.0), "Filtering",
np.where((df4['BBF1.WET_WEATHER.STEP']==5.0), "Backwashing",
np.where((df4['BBF1.WET_WEATHER.STEP']==2.0),"Start of Filtering",
np.where((df4['BBF1.WET_WEATHER.STEP']==6.0), "End of Backwash",
np.where((df4['BBF1.WET_WEATHER.STEP']==4.0),"End of Filtering",
np.where((df4['BBF1.WET_WEATHER.STEP']==0),"Idle",None))))))
df4['[B] Primary Step Description'] = np.where((df4['BBF2.PRIMARY.STEP']==3.0), "Filtering",
np.where((df4['BBF2.PRIMARY.STEP']==5.0), "Backwashing",
np.where((df4['BBF2.PRIMARY.STEP']==2.0),"Start of Filtering",
np.where((df4['BBF2.PRIMARY.STEP']==6.0), "End of Backwash",
np.where((df4['BBF2.PRIMARY.STEP']==4.0),"End of Filtering",
np.where((df4['BBF2.PRIMARY.STEP']==0),"Idle",None))))))
df4['[B] Backwash Step Description'] = np.where((df4['BBF2.BACKWASH.STEP']==0), "Not in BW",
np.where((df4['BBF2.BACKWASH.STEP']==2.0),"Step 1: Idle",
np.where((df4['BBF2.BACKWASH.STEP']==3.0), "Step 2: Drain",
np.where((df4['BBF2.BACKWASH.STEP']==4.0),"Step 3: Idle",
np.where((df4['BBF2.BACKWASH.STEP']==5.0),"Step 4: Air",
np.where((df4['BBF2.BACKWASH.STEP']==6.0),"Step 5: Idle",
np.where((df4['BBF2.BACKWASH.STEP']==7.0),"Step 6: Drain",
np.where((df4['BBF2.BACKWASH.STEP']==8.0),"Step 7: Idle",
np.where((df4['BBF2.BACKWASH.STEP']==9.0),"Step 8: Drain",
np.where((df4['BBF2.BACKWASH.STEP']==10.0),"Step: 9 Idle",
np.where((df4['BBF2.BACKWASH.STEP']==11.0),"Step 10: Drain",
np.where((df4['BBF2.BACKWASH.STEP']==12.0),"Step 11: Idle",None))))))))))))
df4['[F] Backwash Step Description'] = np.where((df4['BBF1.BACKWASH.STEP']==0), "Not in BW",
np.where((df4['BBF1.BACKWASH.STEP']==2.0),"Step 1: Idle",
np.where((df4['BBF1.BACKWASH.STEP']==3.0), "Step 2: Drain",
np.where((df4['BBF1.BACKWASH.STEP']==4.0),"Step 3: Idle",
np.where((df4['BBF1.BACKWASH.STEP']==5.0),"Step 4: Air",
np.where((df4['BBF1.BACKWASH.STEP']==6.0),"Step 5: Idle",
np.where((df4['BBF1.BACKWASH.STEP']==7.0),"Step 6: Drain",
np.where((df4['BBF1.BACKWASH.STEP']==8.0),"Step 7: Idle",
np.where((df4['BBF1.BACKWASH.STEP']==9.0),"Step 8: Drain",
np.where((df4['BBF1.BACKWASH.STEP']==10.0),"Step: 9 Idle",
np.where((df4['BBF1.BACKWASH.STEP']==11.0),"Step 10: Drain",
np.where((df4['BBF1.BACKWASH.STEP']==12.0),"Step 11: Idle",None))))))))))))
df4['[B] Recirculation Step Description'] = np.where((df4['BBF2.RECIRC.STEP']==2.0),"Recirculating","Idle")
df4['[F] Recirculation Step Description'] = np.where((df4['BBF1.RECIRC.STEP']==2.0),"Recirculating","Idle")
df4 = df4.rename(columns={'SENSORS.INFLUENT.SST': 'newName1'})
df4.drop(([df4.columns[1],df4.columns[15],df4.columns[16],df4.columns[30]]),axis=1,inplace=True)
df4 = df4.rename(columns={df4.columns[0]: 'DateTime',
df4.columns[1] : "[LF] Influent ORP (mV)",
df4.columns[2] : "[F] Reactor DO (ppm)",
df4.columns[3] : "[F] Reactor pH",
df4.columns[4] : "[F] Reactor Temp (F)",
df4.columns[5] : "[F] Effulent TSS (ppm)",
df4.columns[6] : "[F] Reactor Pressure (psi)",
df4.columns[7] : "[F] Influent Flow (gpm)",
df4.columns[8] : "[B] Reactor DO (ppm)",
df4.columns[9] : "[B] Reactor pH",
df4.columns[10] : "[B] Reactor Temp (F)",
df4.columns[11] : "[B] Effulent TSS (ppm)",
df4.columns[12] : "[B] Reactor Pressure (psi)",
df4.columns[13] : "[B] Influent Flow (gpm)",
df4.columns[14] : "[Coag] Stock Solution Conc. (ppm)",
df4.columns[15] : "[Coag] Influent Final Conc. SP (ppm)",
df4.columns[16] : "[Coag] ",
df4.columns[17] : "[Poly] Stock Solution Conc. (ppm)",
df4.columns[18] : "[Poly] Influent Final Conc. SP (ppm)",
df4.columns[19] : "[Poly] ",
df4.columns[20] : "[HF] Pump Hz",
df4.columns[21] : "[LF] Pump Hz",
df4.columns[22] : "[Coag] Pump Flowrate (mL/min)",
df4.columns[23] : "[Poly] Pump Flowrate (mL/min)",
df4.columns[24] : "[HF] Influent TSS (ppm)",
df4.columns[25] : "[LF] Influent TSS (ppm)",
df4.columns[26] : "[LF] Influent Temperature (°F)",
df4.columns[27] : "[B] Effulent ORP (mV)",
df4.columns[28] : "[F] Reactor Linear Velocity (m/hr)",
df4.columns[29] : "[B] Reactor EBCT (min)",
df4.columns[30] : "[F] Primary Step Number",
df4.columns[31] : "[F] Backwash Step Number",
df4.columns[32] : "[F] Wet Weather Step Number",
df4.columns[33] : "[F] Recirculation Step Number",
df4.columns[34] : "[B] Backwash Step Number",
df4.columns[35] : "[B] Primary Step Number",
df4.columns[36] : "[B] Wet Weather Step Number",
df4.columns[37] : "[B] Recirculation Step Number",
df4.columns[58] : "[B] Aeration Valve"})
df4.drop((df4.columns[[38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57]]),axis=1,inplace=True)
df4.drop(([df4.columns[16],df4.columns[19]]),axis=1,inplace=True)
df4['Time Delta (min)'] = df4['DateTime'].shift(-1)-df4['DateTime']
df4['Time Delta (min)'] = df4['Time Delta (min)'].dt.total_seconds()/60
Date, Time = zip(*[(d.date(), d.time()) for d in df4['DateTime']])
df4 = df4.assign(Date=Date, Time=Time)
df4 = df4.rename(columns={df4.columns[0]: 'Date/Time'})
df4['[B] Starvation Status'] = np.where((df4['[B] Aeration Valve']==0)&(df4['[B] Recirculation Step Description']=='Recirculating')&(df4['[B] Recirculation Step Description'].shift(-1)!='Idle'),
'Anoxic',np.where((df4['[B] Aeration Valve']==1)&(df4['[B] Recirculation Step Description']=='Recirculating'),'Air','N/A'))
df4['[B] Starvation Status'] = np.where(df4['[B] Wet Weather Step Description']=='Idle',df4['[B] Starvation Status'],'N/A' )
df4['[B] Starvation Status+1'] = np.where(((df4['[B] Starvation Status'].shift(-1)!=df4['[B] Starvation Status'])&(df4['[B] Recirculation Step Description'].shift(-2)=='Recirculating')),True,None)
df4['[B] Anoxic Time (min)'] = np.where((df4['[B] Starvation Status']=='Anoxic'),df4['Time Delta (min)'],None)
df4['[B] Air Time (min)'] = np.where((df4['[B] Starvation Status']=='Air'),df4['Time Delta (min)'],None)
df4['[B] Instant OPR (ppm/hr)'] = np.where(df4['[B] Starvation Status']=='Anoxic',((df4['[B] Reactor DO (ppm)'].shift(-1)-df4['[B] Reactor DO (ppm)'])/df4['Time Delta (min)'])*60,None)
##df4['[B] Reactor DO (ppm)']-df4['[B] Reactor DO (ppm)'].shift(-1) (df4['[B] Recirculation Step Description']=='Recirculating')&
df4['[B] Instant OPR (ppm/hr)'] = np.where(df4['[B] Instant OPR (ppm/hr)']<0,df4['[B] Instant OPR (ppm/hr)']*-1,None)
df4['[B] Instant OPR (ppm/hr)'] = np.where(df4['[B] Reactor DO (ppm)']>0.5,df4['[B] Instant OPR (ppm/hr)'],None)
df4['[B] Instant ORP Air (mV/hr)'] = np.where(df4['[B] Starvation Status']=='Air',((df4['[B] Effulent ORP (mV)'].shift(-1)-df4['[B] Effulent ORP (mV)'])/df4['Time Delta (min)'])*60,None)
df4['[B] Instant ORP Air (mV/hr)'] = np.where(df4['[B] Instant ORP Air (mV/hr)']>0,df4['[B] Instant ORP Air (mV/hr)'],None)
df4['[B] Instant ORP No Air (mV/hr)'] = np.where(df4['[B] Starvation Status']=='Anoxic',((df4['[B] Effulent ORP (mV)'].shift(-1)-df4['[B] Effulent ORP (mV)'])/df4['Time Delta (min)'])*60,None)
df4['[B] Instant ORP No Air (mV/hr)'] = np.where(df4['[B] Instant ORP No Air (mV/hr)']<0,df4['[B] Instant ORP No Air (mV/hr)'],None)
df5 = pd.read_csv("Test Number Dates.csv", index_col=False,low_memory=False)
df5['Date/Time'] = pd.to_datetime(df5['Date/Time'].astype(str), format= "%m/%d/%Y %H:%M")
df4.set_index('Date/Time')
df5.set_index('Date/Time')
df4 = pd.merge_asof(df4,df5, on='Date/Time', direction='backward')
df4['Test Number']=df4['Test Number'].fillna(method='ffill')
print("Saving Data to CSV")
df4.to_csv("Cleaned Data Starvation.csv")
print(str(len(df4.index))+" Rows Exported")
input("Press ENTER to Exit")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment