Last active
June 26, 2024 08:58
-
-
Save spirrobe/7fef86500469b73715bb87245010af4a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| # -*- coding: utf-8 -*- | |
| """ | |
| Created on Wed Sep 15 09:22:30 2021 | |
| @author: spirrobe | |
| """ | |
| import os | |
| import datetime | |
| import requests | |
| import json | |
| class chm15ksession(requests.Session): | |
| """ | |
| A class for interacting with the CHM-15k data server. | |
| This class inherits from the requests.Session class and is designed to | |
| facilitate downloading netCDF and zipped netCDF files from the CHM-15k. | |
| To use this class, you must have a valid password for accessing the | |
| server. | |
| Parameters | |
| ---------- | |
| url : str | |
| The URL of the CHM-15k ceilometer. Can be local ip or http URl | |
| password : str, optional | |
| The password for accessing the CHM-15k. | |
| Default is "15k-Nimbus". | |
| outpath : str, optional | |
| The path to save downloaded files to. Default is the current directory. | |
| download2subdirs : bool, optional | |
| Whether to put files into a subdirectory as outpath/{year}/{month}/{day} | |
| where year, month, day are inferred for each file based on the filename | |
| quiet : bool, optional | |
| Whether to print information about the download progress. | |
| Default is True. | |
| Attributes | |
| ---------- | |
| url : str | |
| The URL of the CHM-15k. | |
| session : requests.Session | |
| The requests session object used to communicate with the server. | |
| password : str | |
| The password for accessing the CHM-15k data server. | |
| outpath : str | |
| The path to save downloaded files to. | |
| filecount : bool | |
| The number of files available on the server. | |
| quiet : bool | |
| Whether to print information about the download progress. | |
| sessionid : str | |
| The ID of the current session with the server. | |
| zipfiles : list of str | |
| The names of the zipped netCDF files available on the server. | |
| zipsizes : list of int | |
| The sizes of the zipped netCDF files available on the server, in bytes. | |
| ncfiles : list of str | |
| The names of the netCDF files available on the server. | |
| ncsizes : list of int | |
| The sizes of the netCDF files available on the server, in bytes. | |
| Methods | |
| ------- | |
| connect() | |
| Connects to the CHM-15k data server and establishes a session. | |
| getfilelist() | |
| Returns a dictionary of available netCDF and zipped netCDF files on the | |
| CHM-15k data server. | |
| getncfiles(overwrite=False) | |
| Downloads all available netCDF files from the CHM-15k to the | |
| local file system. | |
| getzipfiles(overwrite=False) | |
| Downloads all available zipped netCDF files from the CHM-15k | |
| to the local file system. | |
| """ | |
| def __init__(self, | |
| url, | |
| password="15k-Nimbus", | |
| outpath='./', | |
| download2subdirs=False, | |
| timeout=20, | |
| quiet=True, | |
| *args, **kwargs, | |
| ): | |
| """ | |
| Initialize a new instance of the chm15ksession class. | |
| Parameters | |
| ---------- | |
| url : str | |
| The URL of the CHM-15k. | |
| password : str, optional | |
| The password for accessing the CHM-15k data server. Default is | |
| "15k-Nimbus". | |
| outpath : str, optional | |
| The path to save downloaded files to. | |
| Default is the current directory. | |
| timeout : bool, optional | |
| The timeout in seconds for the get calls, adjust if on low bandwidth/slow network. | |
| quiet : bool, optional | |
| Whether to print information about the download progress. | |
| Default is True. | |
| """ | |
| super().__init__(*args, **kwargs) | |
| # assert url, str, 'url must be a str' | |
| self.timeout = timeout | |
| self.url = url | |
| if not self.url.endswith('/'): | |
| self.url += '/' | |
| if not self.url.startswith('http'): | |
| self.url = 'http://' + self.url | |
| self.__cgi = "cgi-bin/chm-cgi" | |
| self.__cgiurl = self.url + self.__cgi | |
| #self.session = requests.Session() | |
| #self = requests.Session() | |
| self.password = password | |
| self.outpath = outpath | |
| self.__subpath = '' | |
| self.download2subdirs = download2subdirs | |
| if not self.outpath.endswith(os.sep): | |
| self.outpath += os.sep | |
| self.filecount = None | |
| self.sessionid = None | |
| self.zipfiles = [] | |
| self.zipsizes = [] | |
| self.ncfiles = [] | |
| self.ncsizes = [] | |
| self.quiet = quiet | |
| def _filename2date(self, filename): | |
| # pattern is YYYYMMDD | |
| _ = filename.split(os.sep)[-1].split('_')[0] | |
| if len(_) == 8: | |
| # typical netcdf files | |
| return _[:4], _[4:4+2], _[4+2:4+2+2] | |
| elif len(_) == 6: | |
| # zipfiles do not have a day as they are for the month | |
| return _[:4], _[4:4+2] | |
| else: | |
| print(f'Date could not be inferred from {filename}') | |
| return '', '', '' | |
| def _filename2datefolder(self, filename): | |
| date = self._filename2date(filename) | |
| if date[0]: | |
| date = [s + i for s, i in zip(['Y','M','D'], date)] | |
| date = os.sep.join(date) + os.sep | |
| if not self.outpath.endswith(os.sep): | |
| date = os.sep + date | |
| return date | |
| else: | |
| return '' | |
| def connect(self): | |
| """ | |
| Connect to the CHM-15k using the provided password. | |
| This method sends a validation request to the CHM-15k data server | |
| with the provided passwordand obtains a session ID that can be | |
| used for subsequent requests. | |
| Raises | |
| ------ | |
| requests.exceptions.RequestException | |
| If the request fails. | |
| """ | |
| validationurl = self.__cgiurl+f"?validatetoken&code={self.password}" | |
| # this url could be used to check if the connection worked | |
| # checkurl = self.__cgiurl+"?checkvalidation" | |
| try: | |
| resp = self.get(validationurl, timeout=self.timeout) | |
| except requests.exceptions.RequestException: | |
| now = datetime.datetime.now(datetime.UTC) | |
| print(f'{now}: Connection failed, check url {self.url} and ' | |
| f'password {self.password}') | |
| return | |
| sessionid = resp.text.strip().split('{')[1].split('}')[0] | |
| resp.close() | |
| sessionid = sessionid.split(':')[1].split(',')[0] | |
| self.sessionid = sessionid | |
| self.cookies.set("session", self.sessionid, | |
| domain=self.url.split(':')[1][2:]) | |
| if not self.quiet: | |
| now = datetime.datetime.now(datetime.UTC) | |
| print(f'{now}: Connection successful to {self.url}') | |
| self.sessionid = True | |
| def getfilelist(self): | |
| """ | |
| Get a list of files from the CHM-15k. | |
| If the connection to the server has not been established, | |
| this method will establish a connection. Sets attributes of the | |
| object to contain the return values as well. | |
| Returns | |
| ------- | |
| dict | |
| A dictionary containing the following keys: | |
| - 'zipfiles': A list of the names of zipped netCDF files. | |
| - 'netcdffiles': A list of the names of netCDF files. | |
| - 'zipsizes': A list of the sizes of zipped netCDF files. | |
| - 'ncsizes': A list of the sizes of netCDF files. | |
| """ | |
| if self.sessionid: | |
| pass | |
| else: | |
| self.connect() | |
| resp = self.get(self.__cgiurl + '?filelist', timeout=self.timeout) | |
| filelist = resp.text | |
| resp.close() | |
| filelist = ''.join(filelist.split('\r\n')[1::2]) | |
| try: | |
| filelist = json.loads(filelist) | |
| except json.JSONDecodeError: | |
| if not self.quiet: | |
| now = datetime.datetime.now(datetime.UTC) | |
| print(f'{now}: Issue with getting proper filelist, aborting getfilelist and potential callers') | |
| return None | |
| self.filecount = filelist['count'] | |
| self.zipfiles = [i[0] for i in filelist["ncfiles"] if 'zip' in i[0]] | |
| self.zipsizes = [i[1] for i in filelist["ncfiles"] if 'zip' in i[0]] | |
| self.ncfiles = [i[0] for i in filelist["ncfiles"] if 'zip' not in i[0]] | |
| self.ncsizes = [i[1] for i in filelist["ncfiles"] if 'zip' not in i[0]] | |
| if not self.quiet: | |
| now = datetime.datetime.now(datetime.UTC) | |
| print(f'{now}: Found {filelist["count"]} files in total to be checked') | |
| print(f'{now}: Found {len(self.ncfiles)} netCDF files') | |
| print(f'{now}: Found {len(self.zipfiles)} zipped netCDF files') | |
| return {'zipfiles': self.zipfiles, 'netcdffiles': self.ncfiles, | |
| 'zipsizes': self.zipsizes, 'ncsizes': self.ncsizes} | |
| def getsinglefile(self, filename, overwrite=True): | |
| """ | |
| Download a single file from the CHM15k to the specified output path. | |
| Parameters | |
| ---------- | |
| filename : str | |
| Name of the file to be downloaded. Can be either zip or nc file. | |
| overwrite : bool, optional | |
| Flag indicating whether to overwrite the file if it already | |
| exists in the output path and has the same size. | |
| Defaults to True. | |
| Returns | |
| ------- | |
| None | |
| If the file is not available on the server or | |
| if the file transfer fails. | |
| Raises | |
| ------ | |
| None | |
| Notes | |
| ----- | |
| This method uses the requests library to download the file | |
| from the server, and saves it to the output path using | |
| the same filename as on the device. | |
| """ | |
| if self.filecount: | |
| pass | |
| else: | |
| self.getfilelist() | |
| if filename not in self.ncfiles or filename in self.zipfiles: | |
| print(f'File {filename} not available') | |
| return | |
| else: | |
| if filename in self.ncfiles: | |
| filesize = self.ncsizes[self.ncfiles.index(filename)] | |
| elif filename in self.zipfiles: | |
| filesize = self.zipsizes[self.zipfiles.index(filename)] | |
| else: | |
| print(f'File {filename} not available') | |
| return | |
| if self.download2subdirs: | |
| self.__subpath = self._filename2datefolder(filename) | |
| os.makedirs(self.outpath + self.__subpath, exist_ok=True) | |
| # check if the file exists, and if it does has the same size | |
| # if so continue | |
| if os.path.exists(self.outpath + self.__subpath + filename): | |
| fs = os.path.getsize(self.outpath + self.__subpath + filename) // 1024 | |
| if fs == filesize and not overwrite: | |
| if not self.quiet: | |
| print(f'File {filename} already exists and has the same ' | |
| 'size as the file on the CHM15k. Pass overwrite to', | |
| 'download anyway') | |
| return | |
| filecontent = self.get(self.__cgiurl+'/'+filename+"?getfile", timeout=self.timeout) | |
| # check if the transfer worked in the firstplace, if not continue | |
| if filecontent.status_code != 200: | |
| if not self.quiet: | |
| now = datetime.datetime.now(datetime.UTC) | |
| print(f'{now}: Filetransfer failed for {filename}') | |
| return | |
| with open(self.outpath + self.__subpath + filename, 'wb') as fo: | |
| fo.write(filecontent.content) | |
| if not self.quiet: | |
| now = datetime.datetime.now(datetime.UTC) | |
| print(f'{now}: Successfully downloaded {filename}') | |
| self.__subpath = '' | |
| def getncfiles(self, overwrite=False): | |
| """ | |
| Download netCDF files from the CHM-15k to the specified `outpath`. | |
| Parameters | |
| ---------- | |
| overwrite : bool, optional | |
| Whether to overwrite existing files with the same name and size | |
| in the `outpath`. | |
| Default is False. | |
| Raises | |
| ------ | |
| ValueError | |
| If `filecount` attribute is False. | |
| Notes | |
| ----- | |
| This method first checks whether the `filecount` attribute is set. | |
| If not, it calls the `getfilelist` method to obtain a list of files | |
| available for download. Then, for each netCDF file in the list, | |
| it checks whether the file already exists in the `outpath` and has | |
| the same size as the file. | |
| If not, it downloads the file using a GET request and saves it | |
| to the `outpath`. | |
| """ | |
| if self.filecount: | |
| pass | |
| else: | |
| self.getfilelist() | |
| dlcount = 0 | |
| for fileno, (filename, filesize) \ | |
| in enumerate(zip(self.ncfiles, self.ncsizes)): | |
| if self.download2subdirs: | |
| self.__subpath = self._filename2datefolder(filename) | |
| # check if the file exists, and if it does has the same size | |
| # if so continue | |
| if os.path.exists(self.outpath + self.__subpath + filename): | |
| fs = os.path.getsize(self.outpath + self.__subpath + filename) // 1024 | |
| if fs == filesize and not overwrite: | |
| if not self.quiet: | |
| now = datetime.datetime.now(datetime.UTC) | |
| print(f'Not downloading {filename} as it exists and has the same size') | |
| print(f'{now}: Progress at ', | |
| f'{round((fileno+1)/len(self.ncfiles) * 100,1)} %') | |
| continue | |
| else: | |
| os.makedirs(self.outpath + self.__subpath, exist_ok=True) | |
| filecontent = self.get( | |
| self.__cgiurl+'/'+filename+"?getfile", timeout=self.timeout) | |
| # check if the transfer worked in the firstplace, if not continue | |
| if filecontent.status_code != 200: | |
| if not self.quiet: | |
| print(f'Filetransfer failed for {filename}') | |
| continue | |
| with open(self.outpath + self.__subpath + filename, 'wb') as fo: | |
| fo.write(filecontent.content) | |
| if not self.quiet: | |
| now = datetime.datetime.now(datetime.UTC) | |
| print(f'{now}: Successfully downloaded {filename}, the {dlcount+1} file') | |
| print(f'{now}: Progress at ' | |
| f'{round((fileno+1)/len(self.ncfiles) * 100,1)} %') | |
| dlcount += 1 | |
| now = datetime.datetime.now(datetime.UTC) | |
| print(f'{now}: Downloaded all {dlcount} files that contained new data ' | |
| f'to {self.outpath + self.__subpath}') | |
| self.__subpath = '' | |
| def getzipfiles(self, overwrite=False): | |
| """ | |
| Download zip files from the CHM-15k to the specified `outpath`. | |
| Parameters | |
| ---------- | |
| overwrite : bool, optional | |
| Whether to overwrite existing files with the same name and size | |
| in the `outpath`. | |
| Default is False. | |
| Raises | |
| ------ | |
| ValueError | |
| If `filecount` attribute is False. | |
| Notes | |
| ----- | |
| This method first checks whether the `filecount` attribute is set. | |
| If not, it calls the `getfilelist` method to obtain a list of files | |
| available for download. Then, for each zip file in the list, | |
| it checks whether the file already exists in the `outpath` and has | |
| the same size as the file. | |
| If not, it downloads the file using a GET request and saves it | |
| to the `outpath`. | |
| """ | |
| if self.filecount: | |
| pass | |
| else: | |
| self.getfilelist() | |
| os.makedirs(self.outpath, exist_ok=True) | |
| for fileno, (filename, filesize) \ | |
| in enumerate(zip(self.zipfiles, self.zipsizes)): | |
| if self.download2subdirs: | |
| self.__subpath = self._filename2datefolder(filename) | |
| # check if the file exists, and if it does has the same size | |
| # if so continue | |
| if os.path.exists(self.outpath + self.__subpath + filename): | |
| fs = os.path.getsize(self.outpath + self.__subpath + filename) // 1024 | |
| if fs == filesize and not overwrite: | |
| if not self.quiet: | |
| print('File already exists and has ' | |
| f'the same size ({filename})') | |
| continue | |
| else: | |
| os.makedirs(self.outpath + self.__subpath, exist_ok=True) | |
| filecontent = self.get( | |
| self.__cgiurl+'/'+filename+"?getfile", timeout=self.timeout) | |
| # check if the transfer worked in the firstplace, if not continue | |
| if filecontent.status_code != 200: | |
| if not self.quiet: | |
| print(f'Filetransfer failed for {filename}') | |
| continue | |
| with open(self.outpath + self.__subpath + filename, 'wb') as fo: | |
| fo.write(filecontent.content) | |
| if not self.quiet: | |
| now = datetime.datetime.now(datetime.UTC) | |
| print(f'{now}: Successfully downloaded {filename}') | |
| print(f'{now}: Progress at ' | |
| f'{round((fileno+1)/len(self.zipfiles) * 100,1)} %') | |
| now = datetime.datetime.now(datetime.UTC) | |
| print(f'{now}: Downloaded all {len(self.zipfiles)} available ' | |
| f'zip files at {self.outpath + self.__subpath}') | |
| self.__subpath = '' | |
| if __name__ == '__main__': | |
| url = '' # the url to connect to, either http/s or ip directly of the chm15k | |
| a = chm15ksession(url | |
| outpath='./', | |
| quiet=False) | |
| # establish a connection, setting up a session, this wil be done automatically | |
| # upon calling other get functions | |
| a.connect() | |
| # get the available files in case you want to download only one file | |
| a.getfilelist() | |
| # usually, one is interested only in the netcdf files that are available, | |
| # especially in an operational setting where other files have already | |
| # been downloaded. | |
| # per default, existing files are not downloaded again | |
| # a.getncfiles() | |
| # zipfiles are created by the device for each month and can be downloaded as well | |
| # per default, existing files are not downloaded again | |
| # a.getzipfiles() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment