Skip to content

Instantly share code, notes, and snippets.

@simone-codeluppi
Created November 19, 2020 16:58
Show Gist options
  • Save simone-codeluppi/bfb07e1159a81a38b620a7bb19a7b1cf to your computer and use it in GitHub Desktop.
Save simone-codeluppi/bfb07e1159a81a38b620a7bb19a7b1cf to your computer and use it in GitHub Desktop.
import prefect
from prefect import task, Flow, Parameter, flatten, unmapped
from prefect.engine.executors import DaskExecutor
from prefect.environments import LocalEnvironment
from prefect import Task
from prefect.environments.storage import Local
import os
from pathlib import Path
class create_folder_structure(Task):
"""
Class used to create the folder structure where to sort the files
generated by the machines and the saving the data created during the
processing. It creates the backbone structure common to all analysis
FOLDER STRUCTURE
- original_robofish_logs: contains all the original robofish logs.
- extra_files: contains the extra files acquired during imaging.
- extra_processing_data: contains extra files used in the analysis
like the dark images for flat field correction.
- pipeline_config: contains all the configuration files.
- raw_data: contains the renamed .nd2 files and the corresponding
pickle configuration files. It is the directory that is
backed up on the server.
- output_figures: contains the reports and visualizations
- notebooks: will contain potential notebooks used for processing the data
- probes: will contains the fasta file with the probes used in the experiment
- tmp: save temporary data
Args:
experiment_fpath: str
folder path of the experiment
"""
def run(self, experiment_fpath:str):
"""
Class used to create the folder structure where to sort the files
generated by the machines and the saving the data created during the
processing. It creates the backbone structure common to all analysis
FOLDER STRUCTURE
- original_robofish_logs: contains all the original robofish logs.
- extra_files: contains the extra files acquired during imaging.
- extra_processing_data: contains extra files used in the analysis
like the dark images for flat field correction.
- pipeline_config: contains all the configuration files.
- raw_data: contains the renamed .nd2 files and the corresponding
pickle configuration files. It is the directory that is
backed up on the server.
- output_figures: contains the reports and visualizations
- notebooks: will contain potential notebooks used for processing the data
- probes: will contains the fasta file with the probes used in the experiment
- tmp: save temporary data
Args:
experiment_fpath: str
folder path of the experiment
"""
experiment_fpath = Path(experiment_fpath)
folders_list = ['raw_data',
'original_robofish_logs',
'extra_processing_data',
'extra_files',
'pipeline_config',
'output_figures',
'notebooks',
'probes',
'tmp']
for folder_name in folders_list:
try:
os.stat(experiment_fpath / folder_name )
self.logger.info(f'{folder_name} already exist')
except FileNotFoundError:
os.mkdir(experiment_fpath / folder_name)
os.chmod(experiment_fpath / folder_name,0o777)
# subclassed task
with Flow("subclassed",environment=LocalEnvironment(DaskExecutor(address='tcp://193.10.16.58:32833')),
storage=Local(directory='/home/simone/tmp_code/flows')) as flow:
experiment_fpath = Parameter('experiment_fpath',default = '/wsfish/smfish_ssd/test')
create_folders = create_folder_structure()
folders = create_folders(experiment_fpath)
flow.register(project_name="test")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment