Last active
May 5, 2021 16:00
-
-
Save thespacedoctor/36df5a1e7db5e0ab1f7a9245d8a81bcf to your computer and use it in GitHub Desktop.
[PS1 to Sherlock Database Importer] #sherlock #panstarrts #dr1 #import
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# encoding: utf-8 | |
""" | |
*Ingest FITS data downloaded from MAST into Sherlock Database* | |
:Author: | |
David Young | |
:Date Created: | |
January 9, 2020 | |
Usage: | |
ps1_point_source_scores_to_sherlock <directory> | |
Options: | |
-h, --help show this help message | |
-v, --version show version | |
-s, --settings the settings file | |
directory path to directory containing the FITS binary tables to ingest | |
""" | |
################# GLOBAL IMPORTS #################### | |
import sys | |
import os | |
from fundamentals import tools | |
import numpy as np | |
from fundamentals.mysql import database | |
from astropy.io import fits | |
from fundamentals.mysql import insert_list_of_dictionaries_into_database_tables | |
import statistics | |
from fundamentals.mysql import writequery | |
from HMpTy import HTM | |
def main(arguments=None): | |
""" | |
*The main function used when ``ps1_point_source_scores_to_sherlock.py`` is run as a single script from the cl* | |
""" | |
# SETUP THE COMMAND-LINE UTIL SETTINGS | |
su = tools( | |
arguments=arguments, | |
docString=__doc__, | |
logLevel="WARNING", | |
options_first=False, | |
projectName=False | |
) | |
arguments, settings, log, dbConn = su.setup() | |
# UNPACK REMAINING CL ARGUMENTS USING `EXEC` TO SETUP THE VARIABLE NAMES | |
# AUTOMATICALLY | |
a = {} | |
for arg, val in list(arguments.items()): | |
if arg[0] == "-": | |
varname = arg.replace("-", "") + "Flag" | |
else: | |
varname = arg.replace("<", "").replace(">", "") | |
a[varname] = val | |
if arg == "--dbConn": | |
dbConn = val | |
a["dbConn"] = val | |
log.debug('%s = %s' % (varname, val,)) | |
# GET LIST OF ALL FITS FILES TO INGEST | |
directory = a["directory"] | |
fitsFiles = [] | |
for d in os.listdir(directory): | |
filepath = os.path.join(directory, d) | |
if os.path.isfile(filepath) and "fits" in d[-6:]: | |
fitsFiles.append(filepath) | |
# SETUP ALL DATABASE CONNECTION - TEST SETUP FIRST | |
dbSettings = { | |
'host': '127.0.0.1', | |
'loginPath': 'unittesting', | |
'user': 'utuser', | |
'password': 'utpass', | |
'db': 'unit_tests' | |
} | |
try: | |
dbConn = database( | |
log=log, | |
dbSettings=dbSettings | |
).connect() | |
except: | |
# SETUP ALL DATABASE CONNECTION - REAL SHERLOCK DB | |
dbSettings = { | |
'host': '127.0.0.1', | |
'user': 'pessto', | |
'password': 'p355t0', | |
'db': 'crossmatch_catalogues' | |
} | |
dbConn = database( | |
log=log, | |
dbSettings=dbSettings | |
).connect() | |
# CREATE THE TABLE IF DOES NOT YET EXIST | |
sqlQuery = """CREATE TABLE IF NOT EXISTS `ps1_psc` ( | |
`objid` bigint(20) NOT NULL, | |
`ps_score` double DEFAULT NULL, | |
`qub_file_id` varchar(100) DEFAULT NULL, | |
`dateCreated` datetime DEFAULT CURRENT_TIMESTAMP, | |
`dateLastModified` datetime DEFAULT CURRENT_TIMESTAMP, | |
`updated` tinyint(4) DEFAULT '0', | |
PRIMARY KEY (`objid`), | |
UNIQUE KEY `objid` (`objid`) | |
) ENGINE=MyISAM DEFAULT CHARSET=latin1; | |
""" | |
writequery( | |
log=log, | |
sqlQuery=sqlQuery, | |
dbConn=dbConn, | |
Force=False, | |
) | |
# DISABLE KEYS | |
sqlQuery = """alter table ps1_psc disable keys;""" % locals() | |
writequery( | |
log=log, | |
sqlQuery=sqlQuery, | |
dbConn=dbConn, | |
) | |
# ITERATE OVER ALL FILES | |
count = len(fitsFiles) | |
for ii, f in enumerate(fitsFiles): | |
index = ii + 1 | |
filename = os.path.basename(f) | |
if index > 1: | |
# Cursor up one line and clear line | |
sys.stdout.write("\x1b[1A\x1b[2K") | |
percent = (float(index) / float(count)) * 100. | |
print('%(index)s/%(count)s (%(percent)1.1f%% done): attempting to import %(filename)s into database' % locals()) | |
# GET FITS FILE ID FROM FILE NAME | |
fitsFileID = os.path.splitext(os.path.basename(f))[0] | |
# READ ALL DATA INTO A LIST OF DICTIONARIES | |
with fits.open(f) as hdul1: | |
tableData = hdul1[1].data.tolist() | |
cols = hdul1[1].columns.names | |
cols.append("qub_file_id") | |
if os.path.basename(f)[0] == "t": | |
cols2 = [] | |
cols2[:] = ["objID2" if c == "objID" else c for c in cols] | |
cols = cols2 | |
dictList = [] | |
dictList[:] = [dict(zip(cols, [None if l == -999 else l for l in (list(row) + [fitsFileID])])) | |
for row in tableData] | |
# USE dbSettings TO ACTIVATE MULTIPROCESSING - INSERT LIST OF DICTIONARIES | |
# INTO DATABASE | |
insert_list_of_dictionaries_into_database_tables( | |
dbConn=dbConn, | |
log=log, | |
dictList=dictList, | |
dbTableName="ps1_psc", | |
uniqueKeyList=["objid"], | |
dateModified=False, | |
dateCreated=False, | |
batchSize=25000, | |
replace=True, | |
dbSettings=dbSettings | |
) | |
# Recursively create missing directories | |
ingestedDir = directory + "/ingested" | |
if not os.path.exists(ingestedDir): | |
os.makedirs(ingestedDir) | |
os.rename(f, ingestedDir + "/" + os.path.basename(f)) | |
return | |
if __name__ == '__main__': | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# encoding: utf-8 | |
""" | |
*Ingest FITS data downloaded from MAST into Sherlock Database* | |
:Author: | |
David Young | |
:Date Created: | |
January 9, 2020 | |
Usage: | |
ps1_point_source_scores_to_sherlock <directory> | |
Options: | |
-h, --help show this help message | |
-v, --version show version | |
-s, --settings the settings file | |
directory path to directory containing the FITS binary tables to ingest | |
""" | |
################# GLOBAL IMPORTS #################### | |
import sys | |
import os | |
from fundamentals import tools | |
import numpy as np | |
from fundamentals.mysql import database | |
from astropy.io import fits | |
from fundamentals.mysql import insert_list_of_dictionaries_into_database_tables | |
import statistics | |
from fundamentals.mysql import writequery | |
from HMpTy import HTM | |
def main(arguments=None): | |
""" | |
*The main function used when ``ps1_point_source_scores_to_sherlock.py`` is run as a single script from the cl* | |
""" | |
# SETUP THE COMMAND-LINE UTIL SETTINGS | |
su = tools( | |
arguments=arguments, | |
docString=__doc__, | |
logLevel="WARNING", | |
options_first=False, | |
projectName=False | |
) | |
arguments, settings, log, dbConn = su.setup() | |
# UNPACK REMAINING CL ARGUMENTS USING `EXEC` TO SETUP THE VARIABLE NAMES | |
# AUTOMATICALLY | |
a = {} | |
for arg, val in list(arguments.items()): | |
if arg[0] == "-": | |
varname = arg.replace("-", "") + "Flag" | |
else: | |
varname = arg.replace("<", "").replace(">", "") | |
a[varname] = val | |
if arg == "--dbConn": | |
dbConn = val | |
a["dbConn"] = val | |
log.debug('%s = %s' % (varname, val,)) | |
# GET LIST OF ALL FITS FILES TO INGEST | |
directory = a["directory"] | |
fitsFiles = [] | |
for d in os.listdir(directory): | |
filepath = os.path.join(directory, d) | |
if os.path.isfile(filepath) and "fits" in d[-6:]: | |
fitsFiles.append(filepath) | |
# SETUP ALL DATABASE CONNECTION - TEST SETUP FIRST | |
dbSettings = { | |
'host': '127.0.0.1', | |
'loginPath': 'unittesting', | |
'user': 'utuser', | |
'password': 'utpass', | |
'db': 'unit_tests' | |
} | |
try: | |
dbConn = database( | |
log=log, | |
dbSettings=dbSettings | |
).connect() | |
except: | |
# SETUP ALL DATABASE CONNECTION - REAL SHERLOCK DB | |
dbSettings = { | |
'host': '127.0.0.1', | |
'user': 'pessto', | |
'password': 'p355t0', | |
'db': 'crossmatch_catalogues' | |
} | |
dbConn = database( | |
log=log, | |
dbSettings=dbSettings | |
).connect() | |
# ITERATE OVER ALL FILES | |
count = len(fitsFiles) | |
for ii, f in enumerate(fitsFiles): | |
index = ii + 1 | |
filename = os.path.basename(f) | |
if index > 1: | |
# Cursor up one line and clear line | |
sys.stdout.write("\x1b[1A\x1b[2K") | |
percent = (float(index) / float(count)) * 100. | |
print('%(index)s/%(count)s (%(percent)1.1f%% done): attempting to import %(filename)s into database' % locals()) | |
# GET FITS FILE ID FROM FILE NAME | |
fitsFileID = os.path.splitext(os.path.basename(f))[0] | |
# READ ALL DATA INTO A LIST OF DICTIONARIES | |
with fits.open(f) as hdul1: | |
tableData = hdul1[1].data.tolist() | |
cols = hdul1[1].columns.names | |
# cols.append("qub_file_id") | |
# if os.path.basename(f)[0] == "t": | |
# cols2 = [] | |
# cols2[:] = ["objID2" if c == "objID" else c for c in cols] | |
# cols = cols2 | |
dictList = [] | |
dictList[:] = [dict(zip(cols, [None if l == -999 else l for l in list(row)])) | |
for row in tableData] | |
# USE dbSettings TO ACTIVATE MULTIPROCESSING - INSERT LIST OF DICTIONARIES | |
# INTO DATABASE | |
insert_list_of_dictionaries_into_database_tables( | |
dbConn=dbConn, | |
log=log, | |
dictList=dictList, | |
dbTableName="tcs_cat_ps1_dr1", | |
uniqueKeyList=["objid"], | |
dateModified=False, | |
dateCreated=False, | |
batchSize=25000, | |
replace=True, | |
dbSettings=dbSettings | |
) | |
# Recursively create missing directories | |
ingestedDir = directory + "/ingested" | |
if not os.path.exists(ingestedDir): | |
os.makedirs(ingestedDir) | |
os.rename(f, ingestedDir + "/" + os.path.basename(f)) | |
return | |
if __name__ == '__main__': | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# encoding: utf-8 | |
""" | |
*Ingest FITS data downloaded from MAST into Sherlock Database* | |
:Author: | |
David Young | |
:Date Created: | |
January 9, 2020 | |
Usage: | |
ps1_to_sherlock <directory> | |
Options: | |
-h, --help show this help message | |
-v, --version show version | |
-s, --settings the settings file | |
directory path to directory containing the FITS binary tables to ingest | |
""" | |
################# GLOBAL IMPORTS #################### | |
import sys | |
import os | |
from fundamentals import tools | |
import numpy as np | |
from fundamentals.mysql import database | |
from astropy.io import fits | |
from fundamentals.mysql import insert_list_of_dictionaries_into_database_tables | |
import statistics | |
from fundamentals.mysql import writequery | |
from HMpTy import HTM | |
def main(arguments=None): | |
""" | |
*The main function used when ``ps1_to_sherlock.py`` is run as a single script from the cl* | |
""" | |
# SETUP THE COMMAND-LINE UTIL SETTINGS | |
su = tools( | |
arguments=arguments, | |
docString=__doc__, | |
logLevel="WARNING", | |
options_first=False, | |
projectName=False | |
) | |
arguments, settings, log, dbConn = su.setup() | |
# UNPACK REMAINING CL ARGUMENTS USING `EXEC` TO SETUP THE VARIABLE NAMES | |
# AUTOMATICALLY | |
a = {} | |
for arg, val in list(arguments.items()): | |
if arg[0] == "-": | |
varname = arg.replace("-", "") + "Flag" | |
else: | |
varname = arg.replace("<", "").replace(">", "") | |
a[varname] = val | |
if arg == "--dbConn": | |
dbConn = val | |
a["dbConn"] = val | |
log.debug('%s = %s' % (varname, val,)) | |
# GET LIST OF ALL FITS FILES TO INGEST | |
directory = a["directory"] | |
fitsFiles = [] | |
for d in os.listdir(directory): | |
filepath = os.path.join(directory, d) | |
if os.path.isfile(filepath) and "fits" in d[-6:]: | |
fitsFiles.append(filepath) | |
fitsFiles.sort() | |
# SETUP ALL DATABASE CONNECTION - TEST SETUP FIRST | |
dbSettings = { | |
'host': '127.0.0.1', | |
'loginPath': 'unittesting', | |
'user': 'utuser', | |
'password': 'utpass', | |
'db': 'unit_tests' | |
} | |
try: | |
dbConn = database( | |
log=log, | |
dbSettings=dbSettings | |
).connect() | |
except: | |
# SETUP ALL DATABASE CONNECTION - REAL SHERLOCK DB | |
dbSettings = { | |
'host': '127.0.0.1', | |
'user': 'pessto', | |
'password': 'p355t0', | |
'db': 'crossmatch_catalogues' | |
} | |
dbConn = database( | |
log=log, | |
dbSettings=dbSettings | |
).connect() | |
# CREATE THE TABLE IF DOES NOT YET EXIST | |
sqlQuery = """CREATE TABLE IF NOT EXISTS `tcs_cat_ps1_dr1` ( | |
`dateCreated` datetime DEFAULT CURRENT_TIMESTAMP, | |
`dateLastModified` datetime DEFAULT CURRENT_TIMESTAMP, | |
`updated` tinyint(4) DEFAULT '0', | |
`gExtNSigma` double DEFAULT NULL, | |
`gKronRad` double DEFAULT NULL, | |
`gmomentR1` double DEFAULT NULL, | |
`gmomentRH` double DEFAULT NULL, | |
`gpsfLikelihood` double DEFAULT NULL, | |
`iExtNSigma` double DEFAULT NULL, | |
`iKronRad` double DEFAULT NULL, | |
`imomentR1` double DEFAULT NULL, | |
`imomentRH` double DEFAULT NULL, | |
`ipsfLikelihood` double DEFAULT NULL, | |
`objID` bigint(20) NOT NULL, | |
`rExtNSigma` double DEFAULT NULL, | |
`rKronRad` double DEFAULT NULL, | |
`rmomentR1` double DEFAULT NULL, | |
`rmomentRH` double DEFAULT NULL, | |
`rpsfLikelihood` double DEFAULT NULL, | |
`uniquePspsSTid` bigint(20) NOT NULL, | |
`yExtNSigma` double DEFAULT NULL, | |
`yKronRad` double DEFAULT NULL, | |
`ymomentR1` double DEFAULT NULL, | |
`ymomentRH` double DEFAULT NULL, | |
`ypsfLikelihood` double DEFAULT NULL, | |
`zExtNSigma` double DEFAULT NULL, | |
`zKronRad` double DEFAULT NULL, | |
`zmomentR1` double DEFAULT NULL, | |
`zmomentRH` double DEFAULT NULL, | |
`zpsfLikelihood` double DEFAULT NULL, | |
`bestDetection` tinyint(4) DEFAULT NULL, | |
`gApMag` double DEFAULT NULL, | |
`gApMagErr` double DEFAULT NULL, | |
`gKronMag` double DEFAULT NULL, | |
`gKronMagErr` double DEFAULT NULL, | |
`gPSFMag` double DEFAULT NULL, | |
`gPSFMagErr` double DEFAULT NULL, | |
`gdec` double DEFAULT NULL, | |
`ginfoFlag` int(11) DEFAULT NULL, | |
`ginfoFlag2` int(11) DEFAULT NULL, | |
`ginfoFlag3` int(11) DEFAULT NULL, | |
`gra` double DEFAULT NULL, | |
`iApMag` double DEFAULT NULL, | |
`iApMagErr` double DEFAULT NULL, | |
`iKronMag` double DEFAULT NULL, | |
`iKronMagErr` double DEFAULT NULL, | |
`iPSFMag` double DEFAULT NULL, | |
`iPSFMagErr` double DEFAULT NULL, | |
`idec` double DEFAULT NULL, | |
`iinfoFlag` int(11) DEFAULT NULL, | |
`iinfoFlag2` tinyint(4) DEFAULT NULL, | |
`iinfoFlag3` int(11) DEFAULT NULL, | |
`ira` double DEFAULT NULL, | |
`primaryDetection` tinyint(4) DEFAULT NULL, | |
`rApMag` double DEFAULT NULL, | |
`rApMagErr` double DEFAULT NULL, | |
`rKronMag` double DEFAULT NULL, | |
`rKronMagErr` double DEFAULT NULL, | |
`rPSFMag` double DEFAULT NULL, | |
`rPSFMagErr` double DEFAULT NULL, | |
`rdec` double DEFAULT NULL, | |
`rinfoFlag` int(11) DEFAULT NULL, | |
`rinfoFlag2` int(11) DEFAULT NULL, | |
`rinfoFlag3` int(11) DEFAULT NULL, | |
`rra` double DEFAULT NULL, | |
`yApMag` double DEFAULT NULL, | |
`yApMagErr` double DEFAULT NULL, | |
`yKronMag` double DEFAULT NULL, | |
`yKronMagErr` double DEFAULT NULL, | |
`yPSFMag` double DEFAULT NULL, | |
`yPSFMagErr` double DEFAULT NULL, | |
`ydec` double DEFAULT NULL, | |
`yinfoFlag` int(11) DEFAULT NULL, | |
`yinfoFlag2` int(11) DEFAULT NULL, | |
`yinfoFlag3` int(11) DEFAULT NULL, | |
`yra` double DEFAULT NULL, | |
`zApMag` double DEFAULT NULL, | |
`zApMagErr` double DEFAULT NULL, | |
`zKronMag` double DEFAULT NULL, | |
`zKronMagErr` double DEFAULT NULL, | |
`zPSFMag` double DEFAULT NULL, | |
`zPSFMagErr` double DEFAULT NULL, | |
`zdec` double DEFAULT NULL, | |
`zinfoFlag` int(11) DEFAULT NULL, | |
`zinfoFlag2` int(11) DEFAULT NULL, | |
`zinfoFlag3` int(11) DEFAULT NULL, | |
`zra` double DEFAULT NULL, | |
`qub_file_id` varchar(100) DEFAULT NULL, | |
`decAve` double DEFAULT NULL, | |
`raAve` double DEFAULT NULL, | |
`htm16ID` bigint(20) DEFAULT NULL, | |
`htm13ID` int(11) DEFAULT NULL, | |
`htm10ID` int(11) DEFAULT NULL, | |
`ps_score` double DEFAULT NULL, | |
`nn_score` double DEFAULT NULL, | |
PRIMARY KEY (`objID`), | |
KEY `idx_htm10ID` (`htm10ID`), | |
KEY `idx_htm13ID` (`htm13ID`), | |
KEY `idx_htm16ID` (`htm16ID`), | |
KEY `idx_gmag` (`gPSFMag`), | |
KEY `idx_rmag` (`rPSFMag`), | |
KEY `idx_imag` (`iPSFMag`), | |
KEY `idx_zmag` (`zPSFMag`), | |
KEY `idx_ymag` (`yPSFMag`), | |
KEY `idx_ps_score` (`ps_score`), | |
KEY `idx_nn_score` (`nn_score`), | |
KEY `idx_uniquePspsSTid` (`uniquePspsSTid`) | |
) ENGINE=MyISAM DEFAULT CHARSET=latin1; | |
""" | |
writequery( | |
log=log, | |
sqlQuery=sqlQuery, | |
dbConn=dbConn, | |
Force=False, | |
) | |
# DISABLE KEYS | |
#sqlQuery = """alter table tcs_cat_ps1_dr1 disable keys;""" % locals() | |
# writequery( | |
# log=log, | |
# sqlQuery=sqlQuery, | |
# dbConn=dbConn, | |
#) | |
# SETUP HTM MESHES | |
mesh10 = HTM( | |
depth=10, | |
log=log | |
) | |
mesh13 = HTM( | |
depth=13, | |
log=log | |
) | |
mesh16 = HTM( | |
depth=16, | |
log=log | |
) | |
# ITERATE OVER ALL FILES | |
count = len(fitsFiles) | |
for ii, f in enumerate(fitsFiles): | |
index = ii + 1 | |
filename = os.path.basename(f) | |
if index > 1: | |
# Cursor up one line and clear line | |
sys.stdout.write("\x1b[1A\x1b[2K") | |
percent = (float(index) / float(count)) * 100. | |
print('%(index)s/%(count)s (%(percent)1.1f%% done): attempting to import %(filename)s into database' % locals()) | |
# GET FITS FILE ID FROM FILE NAME | |
fitsFileID = os.path.splitext(os.path.basename(f))[0][1:] | |
# READ ALL DATA INTO A LIST OF DICTIONARIES | |
with fits.open(f) as hdul1: | |
tableData = hdul1[1].data.tolist() | |
cols = hdul1[1].columns.names | |
cols.append("qub_file_id") | |
# if os.path.basename(f)[0] == "t": | |
# cols2 = [] | |
# cols2[:] = ["objID2" if c == "objID" else c for c in cols] | |
# cols = cols2 | |
dictList = [] | |
dictList[:] = [dict(zip(cols, [None if l == -999 else l for l in (list(row) + [fitsFileID])])) | |
for row in tableData] | |
# REMOVE GOOD ENTRIES | |
newList = [] | |
newList[:] = [l for l in dictList if ( | |
l["iinfoFlag2"] and int(l["iinfoFlag2"]) > 255)] | |
#dictList = newList | |
#for d in dictList: | |
# d["iinfoFlag2"] = None | |
# GENERATE HTMIDs IF NEEDED | |
filts = ["g", "r", "i", "z", "y"] | |
raList = [] | |
decList = [] | |
if "ira" in cols: | |
for d in dictList: | |
ras = [] | |
decs = [] | |
for fil in filts: | |
if d[fil + "ra"]: | |
ras.append(d[fil + "ra"]) | |
decs.append(d[fil + "dec"]) | |
oneRa = statistics.mean(ras) | |
oneDec = statistics.mean(decs) | |
d["raAve"] = oneRa | |
d["decAve"] = oneDec | |
raList.append(oneRa) | |
decList.append(oneDec) | |
htmid10 = mesh10.lookup_id(raList, decList) | |
htmid13 = mesh13.lookup_id(raList, decList) | |
htmid16 = mesh16.lookup_id(raList, decList) | |
for h10, h13, h16, d in zip(htmid10, htmid13, htmid16, dictList): | |
d["htm10ID"] = h10 | |
d["htm13ID"] = h13 | |
d["htm16ID"] = h16 | |
# USE dbSettings TO ACTIVATE MULTIPROCESSING - INSERT LIST OF DICTIONARIES | |
# INTO DATABASE | |
insert_list_of_dictionaries_into_database_tables( | |
dbConn=dbConn, | |
log=log, | |
dictList=dictList, | |
dbTableName="tcs_cat_ps1_dr1", | |
uniqueKeyList=["objID"], | |
dateModified=False, | |
dateCreated=False, | |
batchSize=25000, | |
replace=True, | |
dbSettings=dbSettings | |
) | |
# Recursively create missing directories | |
ingestedDir = directory + "/ingested" | |
if not os.path.exists(ingestedDir): | |
os.makedirs(ingestedDir) | |
os.rename(f, ingestedDir + "/" + os.path.basename(f)) | |
return | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment