Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save thespacedoctor/36df5a1e7db5e0ab1f7a9245d8a81bcf to your computer and use it in GitHub Desktop.
Save thespacedoctor/36df5a1e7db5e0ab1f7a9245d8a81bcf to your computer and use it in GitHub Desktop.
[PS1 to Sherlock Database Importer] #sherlock #panstarrts #dr1 #import
#!/usr/bin/env python
# encoding: utf-8
"""
*Ingest FITS data downloaded from MAST into Sherlock Database*
:Author:
David Young
:Date Created:
January 9, 2020
Usage:
ps1_point_source_scores_to_sherlock <directory>
Options:
-h, --help show this help message
-v, --version show version
-s, --settings the settings file
directory path to directory containing the FITS binary tables to ingest
"""
################# GLOBAL IMPORTS ####################
import sys
import os
from fundamentals import tools
import numpy as np
from fundamentals.mysql import database
from astropy.io import fits
from fundamentals.mysql import insert_list_of_dictionaries_into_database_tables
import statistics
from fundamentals.mysql import writequery
from HMpTy import HTM
def main(arguments=None):
"""
*The main function used when ``ps1_point_source_scores_to_sherlock.py`` is run as a single script from the cl*
"""
# SETUP THE COMMAND-LINE UTIL SETTINGS
su = tools(
arguments=arguments,
docString=__doc__,
logLevel="WARNING",
options_first=False,
projectName=False
)
arguments, settings, log, dbConn = su.setup()
# UNPACK REMAINING CL ARGUMENTS USING `EXEC` TO SETUP THE VARIABLE NAMES
# AUTOMATICALLY
a = {}
for arg, val in list(arguments.items()):
if arg[0] == "-":
varname = arg.replace("-", "") + "Flag"
else:
varname = arg.replace("<", "").replace(">", "")
a[varname] = val
if arg == "--dbConn":
dbConn = val
a["dbConn"] = val
log.debug('%s = %s' % (varname, val,))
# GET LIST OF ALL FITS FILES TO INGEST
directory = a["directory"]
fitsFiles = []
for d in os.listdir(directory):
filepath = os.path.join(directory, d)
if os.path.isfile(filepath) and "fits" in d[-6:]:
fitsFiles.append(filepath)
# SETUP ALL DATABASE CONNECTION - TEST SETUP FIRST
dbSettings = {
'host': '127.0.0.1',
'loginPath': 'unittesting',
'user': 'utuser',
'password': 'utpass',
'db': 'unit_tests'
}
try:
dbConn = database(
log=log,
dbSettings=dbSettings
).connect()
except:
# SETUP ALL DATABASE CONNECTION - REAL SHERLOCK DB
dbSettings = {
'host': '127.0.0.1',
'user': 'pessto',
'password': 'p355t0',
'db': 'crossmatch_catalogues'
}
dbConn = database(
log=log,
dbSettings=dbSettings
).connect()
# CREATE THE TABLE IF DOES NOT YET EXIST
sqlQuery = """CREATE TABLE IF NOT EXISTS `ps1_psc` (
`objid` bigint(20) NOT NULL,
`ps_score` double DEFAULT NULL,
`qub_file_id` varchar(100) DEFAULT NULL,
`dateCreated` datetime DEFAULT CURRENT_TIMESTAMP,
`dateLastModified` datetime DEFAULT CURRENT_TIMESTAMP,
`updated` tinyint(4) DEFAULT '0',
PRIMARY KEY (`objid`),
UNIQUE KEY `objid` (`objid`)
) ENGINE=MyISAM DEFAULT CHARSET=latin1;
"""
writequery(
log=log,
sqlQuery=sqlQuery,
dbConn=dbConn,
Force=False,
)
# DISABLE KEYS
sqlQuery = """alter table ps1_psc disable keys;""" % locals()
writequery(
log=log,
sqlQuery=sqlQuery,
dbConn=dbConn,
)
# ITERATE OVER ALL FILES
count = len(fitsFiles)
for ii, f in enumerate(fitsFiles):
index = ii + 1
filename = os.path.basename(f)
if index > 1:
# Cursor up one line and clear line
sys.stdout.write("\x1b[1A\x1b[2K")
percent = (float(index) / float(count)) * 100.
print('%(index)s/%(count)s (%(percent)1.1f%% done): attempting to import %(filename)s into database' % locals())
# GET FITS FILE ID FROM FILE NAME
fitsFileID = os.path.splitext(os.path.basename(f))[0]
# READ ALL DATA INTO A LIST OF DICTIONARIES
with fits.open(f) as hdul1:
tableData = hdul1[1].data.tolist()
cols = hdul1[1].columns.names
cols.append("qub_file_id")
if os.path.basename(f)[0] == "t":
cols2 = []
cols2[:] = ["objID2" if c == "objID" else c for c in cols]
cols = cols2
dictList = []
dictList[:] = [dict(zip(cols, [None if l == -999 else l for l in (list(row) + [fitsFileID])]))
for row in tableData]
# USE dbSettings TO ACTIVATE MULTIPROCESSING - INSERT LIST OF DICTIONARIES
# INTO DATABASE
insert_list_of_dictionaries_into_database_tables(
dbConn=dbConn,
log=log,
dictList=dictList,
dbTableName="ps1_psc",
uniqueKeyList=["objid"],
dateModified=False,
dateCreated=False,
batchSize=25000,
replace=True,
dbSettings=dbSettings
)
# Recursively create missing directories
ingestedDir = directory + "/ingested"
if not os.path.exists(ingestedDir):
os.makedirs(ingestedDir)
os.rename(f, ingestedDir + "/" + os.path.basename(f))
return
if __name__ == '__main__':
main()
#!/usr/bin/env python
# encoding: utf-8
"""
*Ingest FITS data downloaded from MAST into Sherlock Database*
:Author:
David Young
:Date Created:
January 9, 2020
Usage:
ps1_point_source_scores_to_sherlock <directory>
Options:
-h, --help show this help message
-v, --version show version
-s, --settings the settings file
directory path to directory containing the FITS binary tables to ingest
"""
################# GLOBAL IMPORTS ####################
import sys
import os
from fundamentals import tools
import numpy as np
from fundamentals.mysql import database
from astropy.io import fits
from fundamentals.mysql import insert_list_of_dictionaries_into_database_tables
import statistics
from fundamentals.mysql import writequery
from HMpTy import HTM
def main(arguments=None):
"""
*The main function used when ``ps1_point_source_scores_to_sherlock.py`` is run as a single script from the cl*
"""
# SETUP THE COMMAND-LINE UTIL SETTINGS
su = tools(
arguments=arguments,
docString=__doc__,
logLevel="WARNING",
options_first=False,
projectName=False
)
arguments, settings, log, dbConn = su.setup()
# UNPACK REMAINING CL ARGUMENTS USING `EXEC` TO SETUP THE VARIABLE NAMES
# AUTOMATICALLY
a = {}
for arg, val in list(arguments.items()):
if arg[0] == "-":
varname = arg.replace("-", "") + "Flag"
else:
varname = arg.replace("<", "").replace(">", "")
a[varname] = val
if arg == "--dbConn":
dbConn = val
a["dbConn"] = val
log.debug('%s = %s' % (varname, val,))
# GET LIST OF ALL FITS FILES TO INGEST
directory = a["directory"]
fitsFiles = []
for d in os.listdir(directory):
filepath = os.path.join(directory, d)
if os.path.isfile(filepath) and "fits" in d[-6:]:
fitsFiles.append(filepath)
# SETUP ALL DATABASE CONNECTION - TEST SETUP FIRST
dbSettings = {
'host': '127.0.0.1',
'loginPath': 'unittesting',
'user': 'utuser',
'password': 'utpass',
'db': 'unit_tests'
}
try:
dbConn = database(
log=log,
dbSettings=dbSettings
).connect()
except:
# SETUP ALL DATABASE CONNECTION - REAL SHERLOCK DB
dbSettings = {
'host': '127.0.0.1',
'user': 'pessto',
'password': 'p355t0',
'db': 'crossmatch_catalogues'
}
dbConn = database(
log=log,
dbSettings=dbSettings
).connect()
# ITERATE OVER ALL FILES
count = len(fitsFiles)
for ii, f in enumerate(fitsFiles):
index = ii + 1
filename = os.path.basename(f)
if index > 1:
# Cursor up one line and clear line
sys.stdout.write("\x1b[1A\x1b[2K")
percent = (float(index) / float(count)) * 100.
print('%(index)s/%(count)s (%(percent)1.1f%% done): attempting to import %(filename)s into database' % locals())
# GET FITS FILE ID FROM FILE NAME
fitsFileID = os.path.splitext(os.path.basename(f))[0]
# READ ALL DATA INTO A LIST OF DICTIONARIES
with fits.open(f) as hdul1:
tableData = hdul1[1].data.tolist()
cols = hdul1[1].columns.names
# cols.append("qub_file_id")
# if os.path.basename(f)[0] == "t":
# cols2 = []
# cols2[:] = ["objID2" if c == "objID" else c for c in cols]
# cols = cols2
dictList = []
dictList[:] = [dict(zip(cols, [None if l == -999 else l for l in list(row)]))
for row in tableData]
# USE dbSettings TO ACTIVATE MULTIPROCESSING - INSERT LIST OF DICTIONARIES
# INTO DATABASE
insert_list_of_dictionaries_into_database_tables(
dbConn=dbConn,
log=log,
dictList=dictList,
dbTableName="tcs_cat_ps1_dr1",
uniqueKeyList=["objid"],
dateModified=False,
dateCreated=False,
batchSize=25000,
replace=True,
dbSettings=dbSettings
)
# Recursively create missing directories
ingestedDir = directory + "/ingested"
if not os.path.exists(ingestedDir):
os.makedirs(ingestedDir)
os.rename(f, ingestedDir + "/" + os.path.basename(f))
return
if __name__ == '__main__':
main()
#!/usr/bin/env python
# encoding: utf-8
"""
*Ingest FITS data downloaded from MAST into Sherlock Database*
:Author:
David Young
:Date Created:
January 9, 2020
Usage:
ps1_to_sherlock <directory>
Options:
-h, --help show this help message
-v, --version show version
-s, --settings the settings file
directory path to directory containing the FITS binary tables to ingest
"""
################# GLOBAL IMPORTS ####################
import sys
import os
from fundamentals import tools
import numpy as np
from fundamentals.mysql import database
from astropy.io import fits
from fundamentals.mysql import insert_list_of_dictionaries_into_database_tables
import statistics
from fundamentals.mysql import writequery
from HMpTy import HTM
def main(arguments=None):
"""
*The main function used when ``ps1_to_sherlock.py`` is run as a single script from the cl*
"""
# SETUP THE COMMAND-LINE UTIL SETTINGS
su = tools(
arguments=arguments,
docString=__doc__,
logLevel="WARNING",
options_first=False,
projectName=False
)
arguments, settings, log, dbConn = su.setup()
# UNPACK REMAINING CL ARGUMENTS USING `EXEC` TO SETUP THE VARIABLE NAMES
# AUTOMATICALLY
a = {}
for arg, val in list(arguments.items()):
if arg[0] == "-":
varname = arg.replace("-", "") + "Flag"
else:
varname = arg.replace("<", "").replace(">", "")
a[varname] = val
if arg == "--dbConn":
dbConn = val
a["dbConn"] = val
log.debug('%s = %s' % (varname, val,))
# GET LIST OF ALL FITS FILES TO INGEST
directory = a["directory"]
fitsFiles = []
for d in os.listdir(directory):
filepath = os.path.join(directory, d)
if os.path.isfile(filepath) and "fits" in d[-6:]:
fitsFiles.append(filepath)
fitsFiles.sort()
# SETUP ALL DATABASE CONNECTION - TEST SETUP FIRST
dbSettings = {
'host': '127.0.0.1',
'loginPath': 'unittesting',
'user': 'utuser',
'password': 'utpass',
'db': 'unit_tests'
}
try:
dbConn = database(
log=log,
dbSettings=dbSettings
).connect()
except:
# SETUP ALL DATABASE CONNECTION - REAL SHERLOCK DB
dbSettings = {
'host': '127.0.0.1',
'user': 'pessto',
'password': 'p355t0',
'db': 'crossmatch_catalogues'
}
dbConn = database(
log=log,
dbSettings=dbSettings
).connect()
# CREATE THE TABLE IF DOES NOT YET EXIST
sqlQuery = """CREATE TABLE IF NOT EXISTS `tcs_cat_ps1_dr1` (
`dateCreated` datetime DEFAULT CURRENT_TIMESTAMP,
`dateLastModified` datetime DEFAULT CURRENT_TIMESTAMP,
`updated` tinyint(4) DEFAULT '0',
`gExtNSigma` double DEFAULT NULL,
`gKronRad` double DEFAULT NULL,
`gmomentR1` double DEFAULT NULL,
`gmomentRH` double DEFAULT NULL,
`gpsfLikelihood` double DEFAULT NULL,
`iExtNSigma` double DEFAULT NULL,
`iKronRad` double DEFAULT NULL,
`imomentR1` double DEFAULT NULL,
`imomentRH` double DEFAULT NULL,
`ipsfLikelihood` double DEFAULT NULL,
`objID` bigint(20) NOT NULL,
`rExtNSigma` double DEFAULT NULL,
`rKronRad` double DEFAULT NULL,
`rmomentR1` double DEFAULT NULL,
`rmomentRH` double DEFAULT NULL,
`rpsfLikelihood` double DEFAULT NULL,
`uniquePspsSTid` bigint(20) NOT NULL,
`yExtNSigma` double DEFAULT NULL,
`yKronRad` double DEFAULT NULL,
`ymomentR1` double DEFAULT NULL,
`ymomentRH` double DEFAULT NULL,
`ypsfLikelihood` double DEFAULT NULL,
`zExtNSigma` double DEFAULT NULL,
`zKronRad` double DEFAULT NULL,
`zmomentR1` double DEFAULT NULL,
`zmomentRH` double DEFAULT NULL,
`zpsfLikelihood` double DEFAULT NULL,
`bestDetection` tinyint(4) DEFAULT NULL,
`gApMag` double DEFAULT NULL,
`gApMagErr` double DEFAULT NULL,
`gKronMag` double DEFAULT NULL,
`gKronMagErr` double DEFAULT NULL,
`gPSFMag` double DEFAULT NULL,
`gPSFMagErr` double DEFAULT NULL,
`gdec` double DEFAULT NULL,
`ginfoFlag` int(11) DEFAULT NULL,
`ginfoFlag2` int(11) DEFAULT NULL,
`ginfoFlag3` int(11) DEFAULT NULL,
`gra` double DEFAULT NULL,
`iApMag` double DEFAULT NULL,
`iApMagErr` double DEFAULT NULL,
`iKronMag` double DEFAULT NULL,
`iKronMagErr` double DEFAULT NULL,
`iPSFMag` double DEFAULT NULL,
`iPSFMagErr` double DEFAULT NULL,
`idec` double DEFAULT NULL,
`iinfoFlag` int(11) DEFAULT NULL,
`iinfoFlag2` tinyint(4) DEFAULT NULL,
`iinfoFlag3` int(11) DEFAULT NULL,
`ira` double DEFAULT NULL,
`primaryDetection` tinyint(4) DEFAULT NULL,
`rApMag` double DEFAULT NULL,
`rApMagErr` double DEFAULT NULL,
`rKronMag` double DEFAULT NULL,
`rKronMagErr` double DEFAULT NULL,
`rPSFMag` double DEFAULT NULL,
`rPSFMagErr` double DEFAULT NULL,
`rdec` double DEFAULT NULL,
`rinfoFlag` int(11) DEFAULT NULL,
`rinfoFlag2` int(11) DEFAULT NULL,
`rinfoFlag3` int(11) DEFAULT NULL,
`rra` double DEFAULT NULL,
`yApMag` double DEFAULT NULL,
`yApMagErr` double DEFAULT NULL,
`yKronMag` double DEFAULT NULL,
`yKronMagErr` double DEFAULT NULL,
`yPSFMag` double DEFAULT NULL,
`yPSFMagErr` double DEFAULT NULL,
`ydec` double DEFAULT NULL,
`yinfoFlag` int(11) DEFAULT NULL,
`yinfoFlag2` int(11) DEFAULT NULL,
`yinfoFlag3` int(11) DEFAULT NULL,
`yra` double DEFAULT NULL,
`zApMag` double DEFAULT NULL,
`zApMagErr` double DEFAULT NULL,
`zKronMag` double DEFAULT NULL,
`zKronMagErr` double DEFAULT NULL,
`zPSFMag` double DEFAULT NULL,
`zPSFMagErr` double DEFAULT NULL,
`zdec` double DEFAULT NULL,
`zinfoFlag` int(11) DEFAULT NULL,
`zinfoFlag2` int(11) DEFAULT NULL,
`zinfoFlag3` int(11) DEFAULT NULL,
`zra` double DEFAULT NULL,
`qub_file_id` varchar(100) DEFAULT NULL,
`decAve` double DEFAULT NULL,
`raAve` double DEFAULT NULL,
`htm16ID` bigint(20) DEFAULT NULL,
`htm13ID` int(11) DEFAULT NULL,
`htm10ID` int(11) DEFAULT NULL,
`ps_score` double DEFAULT NULL,
`nn_score` double DEFAULT NULL,
PRIMARY KEY (`objID`),
KEY `idx_htm10ID` (`htm10ID`),
KEY `idx_htm13ID` (`htm13ID`),
KEY `idx_htm16ID` (`htm16ID`),
KEY `idx_gmag` (`gPSFMag`),
KEY `idx_rmag` (`rPSFMag`),
KEY `idx_imag` (`iPSFMag`),
KEY `idx_zmag` (`zPSFMag`),
KEY `idx_ymag` (`yPSFMag`),
KEY `idx_ps_score` (`ps_score`),
KEY `idx_nn_score` (`nn_score`),
KEY `idx_uniquePspsSTid` (`uniquePspsSTid`)
) ENGINE=MyISAM DEFAULT CHARSET=latin1;
"""
writequery(
log=log,
sqlQuery=sqlQuery,
dbConn=dbConn,
Force=False,
)
# DISABLE KEYS
#sqlQuery = """alter table tcs_cat_ps1_dr1 disable keys;""" % locals()
# writequery(
# log=log,
# sqlQuery=sqlQuery,
# dbConn=dbConn,
#)
# SETUP HTM MESHES
mesh10 = HTM(
depth=10,
log=log
)
mesh13 = HTM(
depth=13,
log=log
)
mesh16 = HTM(
depth=16,
log=log
)
# ITERATE OVER ALL FILES
count = len(fitsFiles)
for ii, f in enumerate(fitsFiles):
index = ii + 1
filename = os.path.basename(f)
if index > 1:
# Cursor up one line and clear line
sys.stdout.write("\x1b[1A\x1b[2K")
percent = (float(index) / float(count)) * 100.
print('%(index)s/%(count)s (%(percent)1.1f%% done): attempting to import %(filename)s into database' % locals())
# GET FITS FILE ID FROM FILE NAME
fitsFileID = os.path.splitext(os.path.basename(f))[0][1:]
# READ ALL DATA INTO A LIST OF DICTIONARIES
with fits.open(f) as hdul1:
tableData = hdul1[1].data.tolist()
cols = hdul1[1].columns.names
cols.append("qub_file_id")
# if os.path.basename(f)[0] == "t":
# cols2 = []
# cols2[:] = ["objID2" if c == "objID" else c for c in cols]
# cols = cols2
dictList = []
dictList[:] = [dict(zip(cols, [None if l == -999 else l for l in (list(row) + [fitsFileID])]))
for row in tableData]
# REMOVE GOOD ENTRIES
newList = []
newList[:] = [l for l in dictList if (
l["iinfoFlag2"] and int(l["iinfoFlag2"]) > 255)]
#dictList = newList
#for d in dictList:
# d["iinfoFlag2"] = None
# GENERATE HTMIDs IF NEEDED
filts = ["g", "r", "i", "z", "y"]
raList = []
decList = []
if "ira" in cols:
for d in dictList:
ras = []
decs = []
for fil in filts:
if d[fil + "ra"]:
ras.append(d[fil + "ra"])
decs.append(d[fil + "dec"])
oneRa = statistics.mean(ras)
oneDec = statistics.mean(decs)
d["raAve"] = oneRa
d["decAve"] = oneDec
raList.append(oneRa)
decList.append(oneDec)
htmid10 = mesh10.lookup_id(raList, decList)
htmid13 = mesh13.lookup_id(raList, decList)
htmid16 = mesh16.lookup_id(raList, decList)
for h10, h13, h16, d in zip(htmid10, htmid13, htmid16, dictList):
d["htm10ID"] = h10
d["htm13ID"] = h13
d["htm16ID"] = h16
# USE dbSettings TO ACTIVATE MULTIPROCESSING - INSERT LIST OF DICTIONARIES
# INTO DATABASE
insert_list_of_dictionaries_into_database_tables(
dbConn=dbConn,
log=log,
dictList=dictList,
dbTableName="tcs_cat_ps1_dr1",
uniqueKeyList=["objID"],
dateModified=False,
dateCreated=False,
batchSize=25000,
replace=True,
dbSettings=dbSettings
)
# Recursively create missing directories
ingestedDir = directory + "/ingested"
if not os.path.exists(ingestedDir):
os.makedirs(ingestedDir)
os.rename(f, ingestedDir + "/" + os.path.basename(f))
return
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment