Skip to content

Instantly share code, notes, and snippets.

@LukePrior
Last active April 23, 2022 07:05
Show Gist options
  • Save LukePrior/139199d2070a44d006bafb6de974b1b9 to your computer and use it in GitHub Desktop.
Save LukePrior/139199d2070a44d006bafb6de974b1b9 to your computer and use it in GitHub Desktop.
This script calculates updated launch site information for SondeHub using historical data from the last year
import requests
import json
import math
from datetime import datetime, timedelta
import numpy as np
import boto3
from botocore import UNSIGNED
from botocore.config import Config
launch_site_url = 'https://api.v2.sondehub.org/sites'
binned_data_bucket = 'sondehub-history'
rs_types_lookup = {
"LMS6-403": "11",
"RS92": "14",
"DFM-09": "17",
"DFM-06": "18",
"MRZ-N1": "19",
"RS-11G": "22",
"RS41": "41",
"iMet-4": "34",
"iMS-100": "35",
"RS92-NGP": "52",
"DFM-17": "54",
"MRZ-3MK": "62",
"M20": "63",
"M10": "77",
"LMS6-1680": "82",
"iMet-54": "84",
"iMet-1": "07",
"PAZA-12M": "15",
"PAZA-22": "16",
"MK3": "20",
"1524LA LORAN-C/GL5000": "21",
"SRS-C34": "26",
"AVK-MRZ": "27",
"AVK–AK2-02": "28",
"MARZ2-2": "29",
"RS2-80": "30",
"GTS1-2/GFE(L)": "33",
"CF-06": "45",
"AVK-BAR": "58",
"M2K2-R": "59",
"AVK-RZM-2": "68",
"MARL-A/Vektor-M-RZM-2": "69",
"MARL-A": "73",
"RS90": "78",
"RS92": "80",
"MARL-A/Vektor-M-MRZ": "88",
"MARL-A/Vektor-M-BAR": "89",
"iMet-2": "99"
}
# Configure S3 bucket
s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED))
paginator = s3.get_paginator('list_objects_v2')
now = datetime.now()
def round_time(dt=None, roundTo=60):
"""Round a datetime object to any time lapse in seconds
dt : datetime.datetime object, default now.
roundTo : Closest number of seconds to round to, default 1 minute.
Author: Thierry Husson 2012 - Use it as you want but don't blame me.
"""
if dt == None : dt = datetime.now()
seconds = (dt.replace(tzinfo=None) - dt.min).seconds
rounding = (seconds+roundTo/2) // roundTo * roundTo
return dt + timedelta(0,rounding-seconds,-dt.microsecond)
# Get atmospheric density at altitude
def get_density(altitude):
# Constants
airMolWeight = 28.9644 # Molecular weight of air
densitySL = 1.225 # Density at sea level [kg/m3]
pressureSL = 101325 # Pressure at sea level [Pa]
temperatureSL = 288.15 # Temperature at sea level [deg K]
gamma = 1.4
gravity = 9.80665 # Acceleration of gravity [m/s2]
tempGrad = -0.0065 # Temperature gradient [deg K/m]
RGas = 8.31432 # Gas constant [kg/Mol/K]
R = 287.053
deltaTemperature = 0.0
# Lookup Tables
altitudes = [0, 11000, 20000, 32000, 47000, 51000, 71000, 84852]
pressureRels = [
1,
2.23361105092158e-1,
5.403295010784876e-2,
8.566678359291667e-3,
1.0945601337771144e-3,
6.606353132858367e-4,
3.904683373343926e-5,
3.6850095235747942e-6,
]
temperatures = [288.15, 216.65, 216.65, 228.65, 270.65, 270.65, 214.65, 186.946]
tempGrads = [-6.5, 0, 1, 2.8, 0, -2.8, -2, 0]
gMR = gravity * airMolWeight / RGas
# Pick a region to work in
i = 0
if altitude > 0:
while altitude > altitudes[i + 1]:
i = i + 1
# Lookup based on region
baseTemp = temperatures[i]
tempGrad = tempGrads[i] / 1000.0
pressureRelBase = pressureRels[i]
deltaAltitude = altitude - altitudes[i]
temperature = baseTemp + tempGrad * deltaAltitude
# Calculate relative pressure
if math.fabs(tempGrad) < 1e-10:
pressureRel = pressureRelBase * math.exp(
-1 * gMR * deltaAltitude / 1000.0 / baseTemp
)
else:
pressureRel = pressureRelBase * math.pow(
baseTemp / temperature, gMR / tempGrad / 1000.0
)
# Add temperature offset
temperature = temperature + deltaTemperature
# Finally, work out the density...
speedOfSound = math.sqrt(gamma * R * temperature)
pressure = pressureRel * pressureSL
density = densitySL * pressureRel * temperatureSL / temperature
return density
# Get sea level descent rate
def get_descent_rate(descent_rate, altitude):
rho = get_density(altitude)
return math.sqrt((rho / 1.225) * math.pow(descent_rate, 2))
# Check how old flight is
def check_flight_age(date):
date = date.split('/', 1)[1]
date = date.split('/', 1)[1]
date = date.rsplit('/', 1)[0]
date = datetime.strptime(date, '%Y/%m/%d')
return date
# Get flight serial from URL
def get_flight_serial(key):
serial = key.rsplit('/', 1)[1]
serial = serial.split('.json', 1)[0]
return serial
# Download flight summary given key
def get_flight_data(key):
data = s3.get_object(Bucket=binned_data_bucket, Key=key)
return json.loads(data['Body'].read().decode('utf-8'))
# Get burst altitude from data
def check_flight_burst(data):
initial = data[0]['alt']
burst = data[1]['alt']
final = data[2]['alt']
if burst < 10000:
return None
if burst < initial or burst < final:
return None
return burst
# Get frequency from data
def check_flight_frequency(data):
frequency = data[0]['frequency']
for entry in data:
if abs(entry['frequency'] - frequency) > 0.002:
return None
return round(frequency, 2)
# Get sonde type from data
def check_flight_type(data):
sonde_type = data[0]['type']
for entry in data:
if entry['type'] != sonde_type:
return None
return sonde_type
# Get ascent rate from data
def check_flight_ascent(data):
initial = data[0]
burst = data[1]
height = burst['alt'] - initial['alt']
if height < 1000:
return None
initial_time = initial['time_received']
initial_date = datetime.strptime(initial_time, '%Y-%m-%dT%H:%M:%S.%fZ')
burst_time = burst['time_received']
burst_date = datetime.strptime(burst_time, '%Y-%m-%dT%H:%M:%S.%fZ')
difference = burst_date - initial_date
if difference.seconds < 60:
return None
ascent_rate = height / difference.seconds
return ascent_rate
# Get descent rate from data
def check_flight_descent(data):
burst = data[1]
final = data[2]
if final['alt'] > burst['alt']:
return None
if 'vel_v' not in final:
return None
if final['vel_v'] > 0:
return None
if final['alt'] > 12000:
return None
descent = get_descent_rate(final['vel_v'], final['alt'])
return descent
# Get estimated launch time from data
def check_flight_launch(data):
initial = data[0]
burst = data[1]
height = burst['alt'] - initial['alt']
if height < 1000:
return None
initial_time = initial['time_received']
initial_date = datetime.strptime(initial_time, '%Y-%m-%dT%H:%M:%S.%fZ')
burst_time = burst['time_received']
burst_date = datetime.strptime(burst_time, '%Y-%m-%dT%H:%M:%S.%fZ')
difference = burst_date - initial_date
if difference.seconds < 60:
return None
ascent_rate = height / difference.seconds
ascent_time = initial['alt'] / ascent_rate
launch_date = initial_date - timedelta(seconds=ascent_time)
launch_date = round_time(launch_date, 60*60*3)
return launch_date
# Check for launch time patterns given array of datetimes
def check_launch_times(times):
launches = []
first = times[-1]
last = times[0]
days = (last-first).days
hours = [0,3,6,9,12,15,18,21]
for hour in hours:
current = first
count = 0
current = current.replace(hour=hour)
while last > current:
if current in times:
count += 1
current = current + timedelta(days=1)
if (count > days*0.8):
launches.append("0:{0:02d}:00".format(hour))
return launches
# Convert string to floats
def clean_data(data):
for point in data:
if 'alt' in point:
point['alt'] = float(point['alt'])
if 'vel_v' in point:
point['vel_v'] = float(point['vel_v'])
return data
# remove outliers from numpy list
def reject_outliers(data, m = 2.):
d = np.abs(data - np.median(data))
mdev = np.median(d)
s = d/mdev if mdev else 0.
return data[s<m]
# Get sites
launch_site_request = requests.get(launch_site_url)
launch_sites = launch_site_request.json()
# Get binned flights
binned_data = s3.list_objects_v2(
Bucket=binned_data_bucket,
Prefix ='launchsites/',
Delimiter='/',
MaxKeys=1000 )
binned_data = binned_data['CommonPrefixes']
# Generate list of sites we have data for
eligable_launch_sites = []
for site in binned_data:
site_id = site['Prefix']
site_id = site_id.replace('launchsites/', '')
site_id = site_id.replace('/', '')
eligable_launch_sites.append(site_id)
# Start processing
print('Fetching {} launch sites!'.format(len(eligable_launch_sites)))
# Get list of flights
eligable_site_data = {}
i = 0
for site in eligable_launch_sites:
i+=1
print('{}/{} - Fetching launch site #{}'.format(i,len(eligable_launch_sites), site))
eligable_site_data[site] = {}
eligable_site_data[site]['launches'] = []
pages = paginator.paginate(Bucket=binned_data_bucket, Prefix='launchsites/{}/'.format(site))
for page in pages:
for obj in page['Contents']:
date = check_flight_age(obj['Key'])
difference = now - date
days = difference.days
if days > 365:
continue
serial = get_flight_serial(obj['Key'])
eligable_site_data[site]['launches'].insert(0, {'key': obj['Key'], 'date': date})
# Check if enough data for site
i = 0
for site in eligable_site_data:
i+=1
entries = len(eligable_site_data[site]['launches'])
if entries < 30:
print('{}/{} - Skipping launch site #{}'.format(i,len(eligable_launch_sites), site))
continue
print('{}/{} - Processing launch site #{}'.format(i,len(eligable_launch_sites), site))
eligable_site_data[site]['summary'] = {}
eligable_site_data[site]['summary']['types'] = {}
temp_serials = []
for flight in eligable_site_data[site]['launches']:
try:
data = get_flight_data(flight['key'])
if data[0]['serial'] in temp_serials:
eligable_site_data[site]['launches'].remove(flight)
continue
temp_serials.append(data[0]['serial'])
flight['burst'] = check_flight_burst(data)
flight['frequency'] = check_flight_frequency(data)
flight['type'] = check_flight_type(data)
difference = now - flight['date']
days = difference.days
flight['ascent_rate'] = check_flight_ascent(data)
flight['descent_rate'] = check_flight_descent(data)
flight['launch'] = check_flight_launch(data)
if flight['type'] != None and days <= 90:
if flight['type'] in eligable_site_data[site]['summary']['types']:
eligable_site_data[site]['summary']['types'][flight['type']]['count'] = eligable_site_data[site]['summary']['types'][flight['type']]['count'] + 1
else:
eligable_site_data[site]['summary']['types'][flight['type']] = {}
eligable_site_data[site]['summary']['types'][flight['type']]['count'] = 1
if flight['frequency'] != None:
if 'frequencies' not in eligable_site_data[site]['summary']['types'][flight['type']]:
eligable_site_data[site]['summary']['types'][flight['type']]['frequencies'] = {}
if flight['frequency'] in eligable_site_data[site]['summary']['types'][flight['type']]['frequencies']:
eligable_site_data[site]['summary']['types'][flight['type']]['frequencies'][flight['frequency']] = eligable_site_data[site]['summary']['types'][flight['type']]['frequencies'][flight['frequency']] + 1
else:
eligable_site_data[site]['summary']['types'][flight['type']]['frequencies'][flight['frequency']] = 1
if flight['launch'] != None:
if 'times' not in eligable_site_data[site]['summary']['types'][flight['type']]:
eligable_site_data[site]['summary']['types'][flight['type']]['times'] = []
eligable_site_data[site]['summary']['types'][flight['type']]['times'].append(flight['launch'])
except:
pass
temp_count = -1
for sonde_type in eligable_site_data[site]['summary']['types']:
if eligable_site_data[site]['summary']['types'][sonde_type]['count'] > temp_count:
eligable_site_data[site]['summary']['common_type'] = sonde_type
temp_count = eligable_site_data[site]['summary']['types'][sonde_type]['count']
bursts = np.empty((0))
ascents = np.empty((0))
descents = np.empty((0))
for flight in eligable_site_data[site]['launches']:
try:
if flight['type'] != eligable_site_data[site]['summary']['common_type']:
continue
if flight['burst'] != None:
bursts = np.append(bursts, flight['burst'])
if flight['ascent_rate'] != None:
ascents = np.append(ascents, flight['ascent_rate'])
if flight['descent_rate'] != None:
descents = np.append(descents, flight['descent_rate'])
except:
pass
eligable_site_data[site]['summary']['rs_types'] = []
for sonde_type in eligable_site_data[site]['summary']['types']:
if 'frequencies' in eligable_site_data[site]['summary']['types'][sonde_type]:
common_frequency = max(eligable_site_data[site]['summary']['types'][sonde_type]['frequencies'], key=eligable_site_data[site]['summary']['types'][sonde_type]['frequencies'].get)
if eligable_site_data[site]['summary']['types'][sonde_type]['frequencies'][common_frequency] > eligable_site_data[site]['summary']['types'][sonde_type]['count']*0.9:
eligable_site_data[site]['summary']['types'][sonde_type]['frequency'] = common_frequency
eligable_site_data[site]['summary']['types'][sonde_type].pop('frequencies', None)
if 'times' in eligable_site_data[site]['summary']['types'][sonde_type]:
launch_times = check_launch_times(eligable_site_data[site]['summary']['types'][sonde_type]['times'])
if len(launch_times) > 0:
eligable_site_data[site]['summary']['times'] = launch_times
eligable_site_data[site]['summary']['types'][sonde_type].pop('times', None)
if 'frequency' in eligable_site_data[site]['summary']['types'][sonde_type]:
temp = [rs_types_lookup[sonde_type], str(eligable_site_data[site]['summary']['types'][sonde_type]['frequency'])]
eligable_site_data[site]['summary']['rs_types'].append(temp)
else:
eligable_site_data[site]['summary']['rs_types'].append(rs_types_lookup[sonde_type])
eligable_site_data[site]['summary'].pop('types', None)
bursts = reject_outliers(bursts)
ascents = reject_outliers(ascents)
descents = reject_outliers(descents)
if 'common_type' in eligable_site_data[site]['summary']:
eligable_site_data[site]['summary'].pop('common_type', None)
if (np.count_nonzero(bursts) > 30):
eligable_site_data[site]['summary']['burst_altitude'] = round(np.mean(bursts)/100)*100
eligable_site_data[site]['summary']['burst_std'] = round(np.std(bursts)/10)*10
eligable_site_data[site]['summary']['burst_samples'] = np.count_nonzero(bursts)
if (np.count_nonzero(ascents) > 30):
eligable_site_data[site]['summary']['ascent_rate'] = round(np.mean(ascents), 1)
eligable_site_data[site]['summary']['ascent_std'] = round(np.std(ascents), 2)
eligable_site_data[site]['summary']['ascent_samples'] = np.count_nonzero(ascents)
if (np.count_nonzero(descents) > 30):
eligable_site_data[site]['summary']['descent_rate'] = round(np.mean(descents), 1)
eligable_site_data[site]['summary']['descent_std'] = round(np.std(descents), 2)
eligable_site_data[site]['summary']['descent_samples'] = np.count_nonzero(descents)
print(json.dumps(launch_sites[site]))
updated_site = launch_sites[site]
if eligable_site_data[site]['summary']['rs_types'] != None:
updated_site['rs_types'] = eligable_site_data[site]['summary']['rs_types']
if eligable_site_data[site]['summary']['ascent_rate'] != None:
if ('ascent_rate' not in updated_site or
'ascent_samples' not in updated_site or
'ascent_std' not in updated_site or
eligable_site_data[site]['summary']['ascent_samples'] >= (updated_site['ascent_samples'] - 10) or
eligable_site_data[site]['summary']['ascent_std'] < (updated_site['ascent_std'])):
updated_site['ascent_rate'] = eligable_site_data[site]['summary']['ascent_rate']
updated_site['ascent_std'] = eligable_site_data[site]['summary']['ascent_std']
updated_site['ascent_samples'] = eligable_site_data[site]['summary']['ascent_samples']
if eligable_site_data[site]['summary']['burst_altitude'] != None:
if ('burst_altitude' not in updated_site or
'burst_samples' not in updated_site or
'burst_std' not in updated_site or
eligable_site_data[site]['summary']['burst_samples'] >= (updated_site['burst_samples'] - 10) or
eligable_site_data[site]['summary']['burst_std'] < (updated_site['burst_std'])):
updated_site['burst_altitude'] = eligable_site_data[site]['summary']['burst_altitude']
updated_site['burst_std'] = eligable_site_data[site]['summary']['burst_std']
updated_site['burst_samples'] = eligable_site_data[site]['summary']['burst_samples']
if eligable_site_data[site]['summary']['descent_rate'] != None:
if ('descent_rate' not in updated_site or
'descent_samples' not in updated_site or
'descent_std' not in updated_site or
eligable_site_data[site]['summary']['descent_samples'] >= (updated_site['descent_samples'] - 10) or
eligable_site_data[site]['summary']['descent_std'] < (updated_site['descent_std'])):
updated_site['descent_rate'] = eligable_site_data[site]['summary']['descent_rate']
updated_site['descent_std'] = eligable_site_data[site]['summary']['descent_std']
updated_site['descent_samples'] = eligable_site_data[site]['summary']['descent_samples']
print(json.dumps(updated_site))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment