Skip to content

Instantly share code, notes, and snippets.

@davidshumway
Last active May 21, 2022 03:36
Show Gist options
  • Save davidshumway/23c098cbd52bdf0a84c8530b3353f647 to your computer and use it in GitHub Desktop.
Save davidshumway/23c098cbd52bdf0a84c8530b3353f647 to your computer and use it in GitHub Desktop.
'''
When everything is finished, use `gzip -k -f buildingObs.n3` to compress files.
'''
import random
import time
prefix = '''
prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns>
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
prefix sosa: <http://www.w3.org/ns/sosa/>
prefix xsd: <http://www.w3.org/2001/XMLSchema#>
prefix ex: <http://www.example3.com/>
'''
'''
Weather station observations
'''
n = time.time()
# 10 years
# 300 stations * 15 obs/day * 10 years * 365 days/year
size = 300*15*10*365
df_weather_obs = pd.DataFrame({
# 300 stations. 0-299, then repeats.
'station': np.arange(size) % 300,
'result': np.random.randint(0, 10, size, dtype=np.uint32),
# Every 300 rows, increment by 1 until 14. Then repeat.
'property': (np.arange(size) / 300).astype(int) % 15, #% 300
# Each day contains 300 stations * 15 parameters per station per day.
# Convert to seconds per day, then subtract from today.
'time': n - ((np.arange(size) / (300*15)).astype(int) * 24*60*60)
})
df_weather_obs['time'] = df_weather_obs['time'].astype('datetime64[s]')
df_weather_obs['year'] = df_weather_obs['time'].dt.year
df_weather_obs['month'] = df_weather_obs['time'].dt.month
df_weather_obs['day'] = df_weather_obs['time'].dt.day
df_weather_obs['idx'] = df_weather_obs.index
# Assume that the stations occasionally don't report one or more parameters on some days.
df_weather_obs = df_weather_obs.sample(frac=9999/10000)
# Write to n3 file
fn = 'weatherObs.n3'
# Reorder cols
df_weather_obs = df_weather_obs[['idx','station','year','month','day','property','result']]
with open(fn, 'w') as f:
f.write(prefix)
np.savetxt(f, df_weather_obs.values, fmt='''
ex:weatherObservation-%s a sosa:Observation ;
sosa:hasFeatureOfInterest ex:weatherStation-%s ;
sosa:resultTime "%s-%s-%sT00:00:00"^^xsd:dateTime ;
sosa:hasProperty "%s"^^xsd:string ;
sosa:hasSimpleResult "%s"^^xsd:double ;
ex:observationType "weather"^^xsd:string .
''')
'''
Building observations
'''
n = time.time()
# 600 buildings * 1 obs/day * 365 days/year * 10 years
size = 600*1*365*10
df_building_obs = pd.DataFrame({
'building': np.arange(size) % 600,
'time': n - ((np.arange(size) / 600).astype(int) * 24*60*60),
'result': np.random.randint(0, 10, size, dtype=np.uint32)
})
df_building_obs['time'] = df_building_obs['time'].astype('datetime64[s]')
df_building_obs['year'] = df_building_obs['time'].dt.year
df_building_obs['month'] = df_building_obs['time'].dt.month
df_building_obs['day'] = df_building_obs['time'].dt.day
df_building_obs['idx'] = df_building_obs.index
# 2190000 rows means every building has 1 obs per day but we only assume
# 50 obs per year per building.
df_building_obs = df_building_obs.sample(frac=50/365)
# Write to n3 file
fn = 'buildingObs.n3'
# Reorder cols
df_building_obs = df_building_obs[['idx','building','year','month','day','result']]
with open(fn, 'w') as f:
f.write(prefix)
np.savetxt(f, df_building_obs.values, fmt='''
ex:buildingObservation-%s a sosa:Observation ;
sosa:hasFeatureOfInterest ex:building-%s ;
sosa:resultTime "%s-%s-%sT00:00:00"^^xsd:dateTime ;
sosa:hasSimpleResult "%s"^^xsd:double ;
ex:observationType "building"^^xsd:string .
''')
'''
Distance + FOI
'''
# fois
size = 300
df1 = pd.DataFrame({
'station': np.arange(size).astype(int),
'lat': np.random.uniform(-90, 90, size),
'lon': np.random.uniform(-180, 180, size),
})
size = 600
df2 = pd.DataFrame({
'building': np.arange(size).astype(int),
'lat': np.random.uniform(-90, 90, size),
'lon': np.random.uniform(-180, 180, size),
})
# dist
dist = pd.DataFrame({
'building': (np.arange(600*300) % 600).astype(int),
'station': (np.arange(600*300) / 600).astype(int),
})
from geopy import distance
dist['km'] = dist.apply(lambda x: distance.distance(
(df1.iloc[x.station]['lat'], df1.iloc[x.station]['lon']),
(df2.iloc[x.building]['lat'], df2.iloc[x.building]['lon'])).km, axis=1)
dist['km'] = dist['km'].astype(int)
dist = dist.iloc[:, [1, 2, 0, 0, 2, 1]] # reorder cols
# Write FOIs to file
fn = 'foi.n3'
with open(fn, 'w') as f:
f.write(prefix)
np.savetxt(f, df1.to_numpy(dtype=str), fmt='''
ex:weatherStation-%s a sosa:Sensor ;
ex:geoType "weather"^^xsd:string ;
geo:asWKT "<http://www.opengis.net/def/crs/EPSG/0/4326> POINT(%s %s)"^^geo:wktLiteral .
''')
np.savetxt(f, df2.to_numpy(dtype=str), fmt='''
ex:building-%s a sosa:Sensor ;
ex:geoType "building"^^xsd:string ;
geo:asWKT "<http://www.opengis.net/def/crs/EPSG/0/4326> POINT(%s %s)"^^geo:wktLiteral .
''')
# Write to distances to file
fn = 'dist.n3'
with open(fn, 'w') as f:
f.write(prefix)
np.savetxt(f, dist.to_numpy(dtype=str), fmt='''
ex:weatherStation-%s ex:hasDistanceToBuilding [
ex:distance "%s"^^xsd:decimal ;
ex:building ex:building-%s ] .
ex:building-%s ex:hasDistanceToStation [
ex:distance "%s"^^xsd:decimal ;
ex:weatherStation ex:weatherStation-%s ] .
''')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment