tblazina/code_example.py

## code_example.py

import pandas as pd   #Load Pandas module
import numpy as np    #Load Numpy module
import os        	             #Load operating system interface module
import netCDF4	#Load NetCDF4 module for opening chlorophyll-a data

trajectories=pd.read_pickle('C:\\Users\\blazinti\\Desktop\\Data\\Whales_project\\Plynlimon_netcdf_files\\Trajectories\\rainfall_filtered_trajectories')   # This  opens the filtered trajectory data

## The following block of code opens all of the individual chlorophyll-a gridded data sets and compiles them into one matrix


os.chdir('C:\\Users\\blazinti\\Desktop\\Data\\Whales_project\\Satellite_data\\ESA_GlobColour\\daily')  # This changes the working directory to where the chlorophyll-a files are located
files = [f for f in os.listdir('.') if os.path.isfile(f)]    # This creates a list of all the file names in string format
chla=np.empty([len(files),720,1440])		    #This creates an empty 3D matrix, with the dimensions of [time x latitude x longitude]
for i,k in zip(files,range(len(files))):     #This for loop opens each data set and adds the chlorophyll-a data to the empty chla matrix.
	data=netCDF4.Dataset(i)
	test=np.asarray(data.variables['CHL1_mean'])
	test[test<0]=np.nan
	chla[k]=test

# The following two lines creates latitude and longitude arrays for comparing chla grid cells with back trajectory points
lon=np.arange(-180,180,0.25)
lat=np.arange(90,-90,-0.25 )

# This defines a  rounding function to round to nearest quarter degree
def round4(x):
	return round(x*4)/4

trajectories.lat=trajectories.lat.apply(round4)  # This rounds all the trajectory latitudes to the nearest quarter degree
trajectories.lon=trajectories.lon.apply(round4) # this rounds all the trajectory longitudes to the nearest quarter degree

#The following three lines creates a date-array with all of the dates that cover the Plynlimon rain water chemistry data set
start=pd.datetime(2007,03,7)
end=pd.datetime(2009,03,31,21)
rng=pd.date_range(start,end,freq='1D')

#This creates an array that has the size which is as large as the number of trajectory points (~13,000,000) which will be filled below by the for loop.
chla_fill=np.zeros(len(trajectories))

#This initializes the counter for the while loop below to 0
count=0

# This loop goes through every trajectory point and compares if the trajectory points altitude is below the boundary layer height, if so then it checks at what time along the...
# trajectory that the point is (i.e. is it 24 hours back in time, 48 hours back in time,  ect.), and fills in the chla_fill array with the chlorophyll-a value at the trajectory points..
# latitude and longitude

for i in range(len(trajectories)):
	while (pd.to_datetime(trajectories.ix[count]['date'])==rng[i]):
		if  ((trajectories.ix[count]['z']<trajectories.ix[count]['BLH']) &(chla[i,np.where(lat==trajectories.ix[count]['lat']),np.where(lon==trajectories.ix[count]['lon'])].squeeze()>0)):
			if trajectories.ix[count]['time']>=-24:
				chla_fill[count]=chla[i,np.where(lat==trajectories.ix[count]['lat']),np.where(lon==trajectories.ix[count]['lon'])].squeeze()
				count+=1
			elif ((trajectories.ix[count]['time']<-24)&(trajectories.ix[count]['time']>=-48)):
				chla_fill[count]=chla[i-1,np.where(lat==trajectories.ix[count]['lat']),np.where(lon==trajectories.ix[count]['lon'])].squeeze()
				count+=1
			elif (trajectories.ix[count]['time']<-48):
				chla_fill[count]=chla[i-2,np.where(lat==trajectories.ix[count]['lat']),np.where(lon==trajectories.ix[count]['lon'])].squeeze()
				count+=1
		else:
			chla_fill[count]=np.nan
			count+=1

## gistfile1.txt

import pandas as pd   #Load Pandas module
import numpy as np    #Load Numpy module
import os        	             #Load operating system interface module
import netCDF4	#Load NetCDF4 module for opening chlorophyll-a data
trajectories=pd.read_pickle('C:\\Users\\blazinti\\Desktop\\Data\\Whales_project\\Plynlimon_netcdf_files\\Trajectories\\rainfall_filtered_trajectories')   # This  opens the filtered trajectory data
## The following block of code opens all of the individual chlorophyll-a gridded data sets and compiles them into one matrix
os.chdir('C:\\Users\\blazinti\\Desktop\\Data\\Whales_project\\Satellite_data\\ESA_GlobColour\\daily')  # This changes the working directory to where the chlorophyll-a files are located
files = [f for f in os.listdir('.') if os.path.isfile(f)]    # This creates a list of all the file names in string format
chla=np.empty([len(files),720,1440])		    #This creates an empty 3D matrix, with the dimensions of [time x latitude x longitude]
for i,k in zip(files,range(len(files))):     #This for loop opens each data set and adds the chlorophyll-a data to the empty chla matrix.
	data=netCDF4.Dataset(i)
	test=np.asarray(data.variables['CHL1_mean'])
	test[test<0]=np.nan
	chla[k]=test
# The following two lines creates latitude and longitude arrays for comparing chla grid cells with back trajectory points
lon=np.arange(-180,180,0.25)
lat=np.arange(90,-90,-0.25 )
# This defines a  rounding function to round to nearest quarter degree
def round4(x):
	return round(x*4)/4
trajectories.lat=trajectories.lat.apply(round4)  # This rounds all the trajectory latitudes to the nearest quarter degree
trajectories.lon=trajectories.lon.apply(round4) # this rounds all the trajectory longitudes to the nearest quarter degree
#The following three lines creates a date-array with all of the dates that cover the Plynlimon rain water chemistry data set
start=pd.datetime(2007,03,7)
end=pd.datetime(2009,03,31,21)
rng=pd.date_range(start,end,freq='1D')
#This creates an array that has the size which is as large as the number of trajectory points (~13,000,000) which will be filled below by the for loop.
chla_fill=np.zeros(len(trajectories))
#This initializes the counter for the while loop below to 0
count=0
# This loop goes through every trajectory point and compares if the trajectory points altitude is below the boundary layer height, if so then it checks at what time along the...
# trajectory that the point is (i.e. is it 24 hours back in time, 48 hours back in time,  ect.), and fills in the chla_fill array with the chlorophyll-a value at the trajectory points..
# latitude and longitude

for i in range(len(trajectories)):
	while (pd.to_datetime(trajectories.ix[count]['date'])==rng[i]):
		if  ((trajectories.ix[count]['z']<trajectories.ix[count]['BLH']) &(chla[i,np.where(lat==trajectories.ix[count]['lat']),np.where(lon==trajectories.ix[count]['lon'])].squeeze()>0)):
			if trajectories.ix[count]['time']>=-24:
				chla_fill[count]=chla[i,np.where(lat==trajectories.ix[count]['lat']),np.where(lon==trajectories.ix[count]['lon'])].squeeze()
				count+=1
			elif ((trajectories.ix[count]['time']<-24)&(trajectories.ix[count]['time']>=-48)):
				chla_fill[count]=chla[i-1,np.where(lat==trajectories.ix[count]['lat']),np.where(lon==trajectories.ix[count]['lon'])].squeeze()
				count+=1
			elif (trajectories.ix[count]['time']<-48):
				chla_fill[count]=chla[i-2,np.where(lat==trajectories.ix[count]['lat']),np.where(lon==trajectories.ix[count]['lon'])].squeeze()
				count+=1
		else:
			chla_fill[count]=np.nan
			count+=1

	import pandas as pd #Load Pandas module
	import numpy as np #Load Numpy module
	import os #Load operating system interface module
	import netCDF4 #Load NetCDF4 module for opening chlorophyll-a data

	trajectories=pd.read_pickle('C:\\Users\\blazinti\\Desktop\\Data\\Whales_project\\Plynlimon_netcdf_files\\Trajectories\\rainfall_filtered_trajectories') # This opens the filtered trajectory data

	## The following block of code opens all of the individual chlorophyll-a gridded data sets and compiles them into one matrix


	os.chdir('C:\\Users\\blazinti\\Desktop\\Data\\Whales_project\\Satellite_data\\ESA_GlobColour\\daily') # This changes the working directory to where the chlorophyll-a files are located
	files = [f for f in os.listdir('.') if os.path.isfile(f)] # This creates a list of all the file names in string format
	chla=np.empty([len(files),720,1440]) #This creates an empty 3D matrix, with the dimensions of [time x latitude x longitude]
	for i,k in zip(files,range(len(files))): #This for loop opens each data set and adds the chlorophyll-a data to the empty chla matrix.
	data=netCDF4.Dataset(i)
	test=np.asarray(data.variables['CHL1_mean'])
	test[test<0]=np.nan
	chla[k]=test

	# The following two lines creates latitude and longitude arrays for comparing chla grid cells with back trajectory points
	lon=np.arange(-180,180,0.25)
	lat=np.arange(90,-90,-0.25 )

	# This defines a rounding function to round to nearest quarter degree
	def round4(x):
	return round(x*4)/4

	trajectories.lat=trajectories.lat.apply(round4) # This rounds all the trajectory latitudes to the nearest quarter degree
	trajectories.lon=trajectories.lon.apply(round4) # this rounds all the trajectory longitudes to the nearest quarter degree

	#The following three lines creates a date-array with all of the dates that cover the Plynlimon rain water chemistry data set
	start=pd.datetime(2007,03,7)
	end=pd.datetime(2009,03,31,21)
	rng=pd.date_range(start,end,freq='1D')

	#This creates an array that has the size which is as large as the number of trajectory points (~13,000,000) which will be filled below by the for loop.
	chla_fill=np.zeros(len(trajectories))

	#This initializes the counter for the while loop below to 0
	count=0

	# This loop goes through every trajectory point and compares if the trajectory points altitude is below the boundary layer height, if so then it checks at what time along the...
	# trajectory that the point is (i.e. is it 24 hours back in time, 48 hours back in time, ect.), and fills in the chla_fill array with the chlorophyll-a value at the trajectory points..
	# latitude and longitude

	for i in range(len(trajectories)):
	while (pd.to_datetime(trajectories.ix[count]['date'])==rng[i]):
	if ((trajectories.ix[count]['z']<trajectories.ix[count]['BLH']) &(chla[i,np.where(lat==trajectories.ix[count]['lat']),np.where(lon==trajectories.ix[count]['lon'])].squeeze()>0)):
	if trajectories.ix[count]['time']>=-24:
	chla_fill[count]=chla[i,np.where(lat==trajectories.ix[count]['lat']),np.where(lon==trajectories.ix[count]['lon'])].squeeze()
	count+=1
	elif ((trajectories.ix[count]['time']<-24)&(trajectories.ix[count]['time']>=-48)):
	chla_fill[count]=chla[i-1,np.where(lat==trajectories.ix[count]['lat']),np.where(lon==trajectories.ix[count]['lon'])].squeeze()
	count+=1
	elif (trajectories.ix[count]['time']<-48):
	chla_fill[count]=chla[i-2,np.where(lat==trajectories.ix[count]['lat']),np.where(lon==trajectories.ix[count]['lon'])].squeeze()
	count+=1
	else:
	chla_fill[count]=np.nan
	count+=1