Skip to content

Instantly share code, notes, and snippets.

@nikhilgupta10
Last active August 29, 2015 14:07
Show Gist options
  • Save nikhilgupta10/35e0fc3be025066c1409 to your computer and use it in GitHub Desktop.
Save nikhilgupta10/35e0fc3be025066c1409 to your computer and use it in GitHub Desktop.
Python scripts used for Raw data Reprocessing
#Find and change calibration coefficient
#Instrument: IRT
import glob
import os
import tarfile
path = raw_input('Enter the path: (example: /data/archive/sgp/sgpirtE38.00):')
os.chdir(path)
print 'Number of files are:', len([name for name in os.listdir('.') if os.path.isfile(name)])
'''num=1
for tar_file in glob.glob(os.path.join(path, '*.tar')):
#print tar_file
print 'Untaring file: ',num
tar = tarfile.open(tar_file)
tar.extractall()
num += 1'''
for filename in glob.glob(os.path.join(path, '*.icm')):
with open(filename,'r') as file:
data = file.readlines()
length = int(len(data))
for i in range(length):
explode = data[i].split(',')
if len(explode) == 7:
print filename
print explode[6]
explode[6] = str(1.41)+(str(0)*4)+'\n'
#print explode[6]
elif len(explode) == 8:
print filename
print explode[7]
explode[7] = str(1.41)+(str(0)*4)+'\n'
#print explode[7]
data[i] = ','.join(explode)
with open(filename,'w') as file:
file.writelines(data)
#tar.close()
#Untar raw files and change the directions in the records
#Instrument: SONDE
import glob
import os
import tarfile
path = raw_input('Enter the path: (example: /data/archive/grw/grwsondeM1.00):')
os.chdir(path)
print 'Number of .tar files are:', len([name for name in os.listdir('.') if os.path.isfile(name)])
num=1
for tar_file in glob.glob(os.path.join(path, '*.tar')):
#print tar_file
print 'Untaring file: ',num
tar = tarfile.open(tar_file)
tar.extractall()
num += 1
for filename in glob.glob(os.path.join(path, '*.parsed')):
print filename
# fileop = open(filename,'r')
# for data in fileop:
# print data
with open(filename,'r') as file:
data = file.readlines()
for i in range(15): #i is 8. As per file grwsondewnpnM1.00.20090514.052500.raw.2009MAY140525.parsed
for j in range(30): #j is 26 for E as per file grwsondewnpnM1.00.20090514.052500.raw.2009MAY140525.parsed
if data[i][j:j+7]=='28.03 E':
print data[i][j:j+7]
print data[i:i+2]
data[i:i+2] = 'Location : 39.09 N 28.03 W 15 m\r\n', '\r\n'
with open(filename,'w') as file:
file.writelines(data)
print data[i][j:j+7]
print data[i:i+2]
tar.close()
#Open raw files and based on the length of the record, find the particular value to edit with a formula
#Instrument: BRS
import glob
import os
from decimal import Decimal
#import tarfile
path = raw_input('Enter the path: (example: /data/archive/sgp/sgpbrsC1.00):')
os.chdir(path)
print 'Number of files are:', len([name for name in os.listdir('.') if os.path.isfile(name)])
'''num=1
for tar_file in glob.glob(os.path.join(path, '*.tar')):
#print tar_file
print 'Untaring file: ',num
tar = tarfile.open(tar_file)
tar.extractall()
num += 1'''
for filename in glob.glob(os.path.join(path, '*.icm')):
print filename
with open(filename,'r') as file:
data = file.readlines()
length = int(len(data))
for i in range(length):
explode = data[i].split(',')
if len(explode) == 59:
explode[-5] = str(Decimal(str((float(explode[-5])/114.74)*109.51)).quantize(Decimal('0.00001')))#save till 5th decimal
elif len(explode) == 79:
explode[-7] = str(109.51)
explode[-25] = str(Decimal(str((float(explode[-25])/114.74)*109.51)).quantize(Decimal('0.00001')))
data[i] = ','.join(explode)
with open(filename,'w') as file:
file.writelines(data)
#tar.close()
#Untar raw files, open each and edit a certain incorrect record (unlike a normal .csv file, edit this record in header)
#Instrument: TSI
import glob
import os
import tarfile
path = raw_input('Enter the path: (example: /data/archive/ena/enatsiC1.00):')
os.chdir(path)
print 'Number of .tar files are:', len([name for name in os.listdir('.') if os.path.isfile(name)])
num=1
for tar_file in glob.glob(os.path.join(path, '*.tar')):
#print tar_file
print 'Untaring file: ',num
tar = tarfile.open(tar_file)
tar.extractall()
num += 1
for filename in glob.glob(os.path.join(path, '*.properties')):
print filename
with open(filename,'r') as file:
data = file.readlines()
for i in range(50): #As per *20131004*.properties, i for opaque=14:15, thin=43:44
for j in range(30): #As per *20131004*.properties, j for opaque=21:25, thin=19:22
if data[i][j:j+11]=='opaque=0.45':
print data[i:i+1]
data[i:i+1] = ['tsi.threshold.opaque=0.4\r\n']
with open(filename,'w') as file:
file.writelines(data)
print data[i:i+1]
if data[i][j:j+8]=='thin=0.3':
print data[i:i+1]
data[i:i+1]=['tsi.threshold.thin=0.05\r\n']
with open(filename,'w') as file:
file.writelines(data)
print data[i:i+1]
tar.close()
#Change the polarity of few values in Raw data files
#Instrument: IRT
import glob
import os
import tarfile
#import pandas
path = raw_input('Enter the path: (example: /data/archive/sgp/sgpirtE38.00):')
os.chdir(path)
print 'Number of files are:', len([name for name in os.listdir('.') if os.path.isfile(name)])
'''num=1
for tar_file in glob.glob(os.path.join(path, '*.tar')):
#print tar_file
print 'Untaring file: ',num
tar = tarfile.open(tar_file)
tar.extractall()
num += 1'''
'''
#playing with pandas
for filename in glob.glob(os.path.join(path, 'sgpirt25mC1.00.20011013.*.icm')):
filename_explode = filename.split('.')
if int(filename_explode[3]) < 230000:
print filename
df = pandas.read_csv(filename,header=None,
usecols=[5,8,11,14])
print df*-1
#now i dont know how to write and am too damn tired
'''
for filename in glob.glob(os.path.join(path, '*.icm')):
with open(filename,'r') as file:
print filename
data = file.readlines()
length = int(len(data))
for i in range(length):
explode = data[i].split(',')
if len(explode) <= 33:
explode[5] = str(float(explode[5])*-1)#up_short_hemisp
explode[8] = str(float(explode[8])*-1)#up_short_hemisp_max
explode[11] = str(float(explode[11])*-1)#up_short_hemisp_min
explode[14] = str(float(explode[14])*-1)#up_short_hemisp_std
explode[19] = str(float(explode[19])*-1)#inst_up_short_hemisp_tp
data[i] = ','.join(explode)
with open(filename,'w') as file:
file.writelines(data)
#Rename the old raw files to mark them as "bad".
import os
path = raw_input('Enter the path: (example: /data/archive/twp/twpsondeC2.00):')
os.chdir(path)
[os.rename(f, f.replace('raw', 'orig')) for f in os.listdir('.') if not f.startswith('.')]
[os.rename(f, f.replace('bad', 'orig')) for f in os.listdir('.') if not f.startswith('.')]
#Change temperature from degC to degK
#Instrument: IRT
import glob
import os
import tarfile
path = raw_input('Enter the path: (example: /data/archive/sgp/sgpirtE38.00):')
os.chdir(path)
print 'Number of files are:', len([name for name in os.listdir('.') if os.path.isfile(name)])
'''num=1
for tar_file in glob.glob(os.path.join(path, '*.tar')):
#print tar_file
print 'Untaring file: ',num
tar = tarfile.open(tar_file)
tar.extractall()
num += 1'''
for filename in glob.glob(os.path.join(path, '*.icm')):
print filename
with open(filename,'r') as file:
data = file.readlines()
length = int(len(data))
for i in range(length):
explode = data[i].split(',')
if len(explode) == 5:
explode[4] = str(float(explode[4])+273.15)+'\n'
elif len(explode) == 6:
explode[4] = str(float(explode[4])+273.15)
explode[5] = ' '+str(float(explode[5])+273.15)+'\n'
elif len(explode) > 6:
explode[4] = str(float(explode[4])+273.15)
explode[5] = ' '+str(float(explode[5])+273.15)
data[i] = ','.join(explode)
with open(filename,'w') as file:
file.writelines(data)
#tar.close()
#temperature values were off by 10k. so add 10 to existing temperature records
#Instrument: RAD
import glob
import os
#import tarfile
path = raw_input('Enter the path: (example: /data/archive/twp/twpgndradC2.00):')
os.chdir(path)
print 'Number of files are:', len([name for name in os.listdir('.') if os.path.isfile(name)])
'''num=1
for tar_file in glob.glob(os.path.join(path, '*.tar')):
#print tar_file
print 'Untaring file: ',num
tar = tarfile.open(tar_file)
tar.extractall()
num += 1'''
for filename in glob.glob(os.path.join(path, '*.dat')):
print filename
with open(filename,'r') as file:
data = file.readlines()
length = int(len(data))
for i in range(length):
explode = data[i].split(',')
#comparing to -1000 because some of the raw files has -99999, -69999 (probably missing) records
if float(explode[6]) > -1000:
explode[6] = str(float(explode[6])+10)
if float(explode[12]) > -1000:
explode[12] = str(float(explode[12])+10)
if float(explode[15]) > -1000:
explode[15] = str(float(explode[15])+10)
data[i] = ','.join(explode)
with open(filename,'w') as file:
file.writelines(data)
#tar.close()
'''When the reprocessing start date is in middle of a day'''
import glob
import os
#import tarfile
path = raw_input('Enter the path: (example: /data/archive/twp/twpgndradC2.00):')
os.chdir(path)
print 'Number of files are:', len([name for name in os.listdir('.') if os.path.isfile(name)])
'''num=1
for tar_file in glob.glob(os.path.join(path, '*.tar')):
#print tar_file
print 'Untaring file: ',num
tar = tarfile.open(tar_file)
tar.extractall()
num += 1'''
def formula_temp(filename,data,explode):
if float(explode[6]) > -1000:
explode[6] = str(float(explode[6])+10)
if float(explode[12]) > -1000:
explode[12] = str(float(explode[12])+10)
if float(explode[15]) > -1000:
explode[15] = str(float(explode[15])+10)
data[i] = ','.join(explode)
with open(filename,'w') as file:
file.writelines(data)
for filename in glob.glob(os.path.join(path, '*.dat')):
print filename
with open(filename,'r') as file:
data = file.readlines()
length = int(len(data))
for i in range(length):
explode = data[i].split(',')
if 'twpgndradC2.00.20070204.230000.raw.20070204230000.dat' in filename:
if float(explode[3])>= 2317:
formula_temp(filename,data,explode)
else:
formula_temp(filename,data,explode)
#tar.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment