giorgiopizz/mkShapesRDF.py

## mkShapesRDF.py
"""
Main script for the creation of shapes, starting from a configuration folder.

It gives the option to compile the configuration folder and save it as both JSON and pickle file.

It also gives the option to run the analysis in batch mode, or to check for errors in the batch submission.

The analysis can be run in batch mode or locally.

If run in batch mode it gives the ability to merge the output root files.
"""
import sys
from pathlib import Path
import argparse
import os
import glob
import subprocess
import ROOT

ROOT.gROOT.SetBatch(True)

headersPath = os.path.dirname(os.path.dirname(__file__)) + "/include/headers.hh"
ROOT.gInterpreter.Declare(f'#include "{headersPath}"')

import numpy as np


# def hist2array(h, overflow=False, copy=True):
#     shift = 0
#     if overflow:
#         shift = 1
#     d = []
#     for i in range(1 - shift, h.GetNbinsX() + 1 + shift):
#         d.append(h.GetBinContent(i))
#     d = np.array(d)
#     return d

def hist2array(h, flow=False, copy=True, include_sumw2=False):
    nx = h.GetNbinsX() + 2
    dtype=0
    if isinstance(h, ROOT.TH1D):
        dtype=np.double
    elif isinstance(h, ROOT.TH1F):
        dtype=np.float
    elif isinstance(h, ROOT.TH1I):
        dtype=np.int
    else:
        print('Histogram of type', h, 'is not supperted', file=sys.stderr)
        sys.exit(1)

    vals = np.ndarray((nx, ), dtype=dtype, buffer=h.GetArray())
    sumw2 = np.ndarray((nx, ), dtype=dtype, buffer=h.GetSumw2().GetArray())

    shift = 1
    if flow:
        shift = 0
    vals = vals[shift: nx-shift]
    sum2 = sumw2[shift: nx-shift]

    if copy:
        vals = vals.copy()
        sumw2 = sumw2.copy()
    if include_sumw2:
        return vals, sumw2
    else:
        return vals

def defaultParser():
    parser = argparse.ArgumentParser()

    parser.add_argument(
        "-c",
        "--compile",
        type=int,
        choices=[0, 1],
        help="0 compile configuration and save JSON, 1 load compiled configuration",
        required=False,
        default=0,
    )

    parser.add_argument(
        "-o",
        "--operationMode",
        type=int,
        choices=[0, 1, 2],
        help="0 do analysis in batch, 1 check for errors in batch submission, "
        "2 hadd root files",
        required=False,
        default=-1,
    )
    parser.add_argument(
        "-b",
        "--doBatch",
        help="0 (default) runs on local, 1 runs with condor",
        required=False,
        default="0",
    )

    parser.add_argument(
        "-dR",
        "--dryRun",
        choices=[0, 1],
        type=int,
        help="1 do not submit to condor",
        required=False,
        default=0,
    )

    parser.add_argument(
        "-f", "--folder", help="Path to folder", required=False, default="."
    )

    parser.add_argument(
        "-configs",
        "--configsFolder",
        help="Path to configurations folder",
        required=False,
        default="configs",
    )

    parser.add_argument(
        "-config",
        "--configFile",
        help="Path to configuration JSON file to load",
        required=False,
        default="latest",
    )

    parser.add_argument(
        "-l",
        "--limitEvents",
        type=int,
        help="Max number of events",
        required=False,
        default=-1,
    )

    parser.add_argument(
        "-r",
        "--resubmit",
        choices=[0, 1, 2],
        type=int,
        help="Resubmit jobs, 1 resubmit finished jobs with errors, 2 resubmit running jobs",
        required=False,
        default="0",
    )
    return parser


def main():
    parser = defaultParser()
    args = parser.parse_args()

    compileFolder = args.compile
    operationMode = args.operationMode
    doBatch = int(args.doBatch)
    dryRun = int(args.dryRun)
    global folder
    folder = os.path.abspath(args.folder)
    configsFolder = os.path.abspath(args.folder + "/" + args.configsFolder)
    configFile = args.configFile
    resubmit = int(args.resubmit)

    global batchFolder
    global outputFolder

    if compileFolder == 1:
        print(os.getcwd())
        print(os.path.abspath(f"{folder}/configuration.py"))
        if not os.path.exists(folder) or not os.path.exists(
            f"{folder}/configuration.py"
        ):
            print("Error, configuration folder does not exists!")
            sys.exit()

        from mkShapesRDF.shapeAnalysis.ConfigLib import ConfigLib

        # variables before execution of files
        configVars1 = dict(list(globals().items()) + list(locals().items()))

        ConfigLib.loadConfig(["configuration.py"], globals())
        ConfigLib.loadConfig(filesToExec, globals(), imports)

        globals()["varsToKeep"].insert(0, "folder")

        d = ConfigLib.createConfigDict(
            varsToKeep, dict(list(globals().items()) + list(locals().items()))
        )

        # variables after execution of files
        configVars2 = dict(list(globals().items()) + list(locals().items()))

        import datetime

        dt = datetime.datetime.now()
        Path(configsFolder).mkdir(parents=True, exist_ok=True)

        fileName = configsFolder + "/config_" + dt.strftime("%y-%m-%d_%H_%M_%S")
        fileNameJson = configsFolder + "/config"

        ConfigLib.dumpConfigDict(d, fileName)
        ConfigLib.dumpConfigDict(d, fileNameJson, doJson=True)

        ConfigLib.loadDict(d, globals())

    else:
        from mkShapesRDF.shapeAnalysis.ConfigLib import ConfigLib

        if configFile != "latest":
            p = os.path.abspath(configFile)
            if not os.path.exists(p):
                print("Config file", configFile, "doest not exists at path", p)
                sys.exit()
            else:
                d = ConfigLib.loadPickle(p, globals())
        else:
            d = ConfigLib.loadLatestPickle(configsFolder, globals())

    print(samples.keys())
    print(d.keys())

    print("\n\n", batchVars, "\n\n")

    batchFolder = f"{folder}/{batchFolder}"

    Path(f"{folder}/{outputFolder}").mkdir(parents=True, exist_ok=True)

    if operationMode == 2 and os.path.exists(f"{folder}/{outputFolder}/{outputFile}"):
        pass
        # print("Can't merge files, output already exists")
        # print(f"You can run: rm {folder}/{outputFolder}/{outputFile}")
        # sys.exit()

    limit = int(args.limitEvents)

    # PROCESSING
    if runnerFile == "default":
        runnerPath = os.path.realpath(os.path.dirname(__file__)) + "/runner.py"
    else:
        runnerPath = f"{folder}/{runnerFile}"
    print("\n\nRunner path: ", runnerPath, "\n\n")
    if not os.path.exists(runnerPath):
        print("Runner file / path does not exist!")
        sys.exit()

    _results = {}
    sys.path.append(os.path.dirname(runnerPath))
    from runner import RunAnalysis

    if operationMode == 0:
        print("#" * 20, "\n\n", "   Doing analysis", "\n\n", "#" * 20)

        if doBatch == 1:
            print("#" * 20, "\n\n", " Running on condor  ", "\n\n", "#" * 20)

            _samples = RunAnalysis.splitSamples(samples)

            from mkShapesRDF.shapeAnalysis.BatchSubmission import BatchSubmission

            outputPath = os.path.abspath(outputFolder)

            batch = BatchSubmission(
                outputPath,
                batchFolder,
                headersPath,
                runnerPath,
                tag,
                _samples,
                d,
                batchVars,
            )
            batch.createBatches()
            batch.submit(dryRun)

        else:
            print("#" * 20, "\n\n", " Running on local machine  ", "\n\n", "#" * 20)

            outputFileMap = f"{folder}/{outputFolder}/{outputFile}"

            _samples = RunAnalysis.splitSamples(samples, False)
            print(len(_samples))

            global cuts
            runner = RunAnalysis(
                _samples,
                aliases,
                variables,
                cuts,
                nuisances,
                lumi,
                limit,
                outputFileMap,
            )
            runner.run()

    elif operationMode == 1:
        errs = glob.glob("{}/{}/*/err.txt".format(batchFolder, tag))
        files = glob.glob("{}/{}/*/script.py".format(batchFolder, tag))

        errsD = list(map(lambda k: "/".join(k.split("/")[:-1]), errs))
        filesD = list(map(lambda k: "/".join(k.split("/")[:-1]), files))
        # print(files)
        notFinished = list(set(filesD).difference(set(errsD)))
        print(notFinished)
        tabulated = []
        tabulated.append(["Total jobs", "Finished jobs", "Running jobs"])
        tabulated.append([len(files), len(errs), len(notFinished)])
        import tabulate

        print(tabulate.tabulate(tabulated))
        # print('queue 1 Folder in ' + ' '.join(list(map(lambda k: k.split('/')[-1], notFinished))))
        normalErrs = """Warning in <TClass::Init>: no dictionary for class edm::ProcessHistory is available
        Warning in <TClass::Init>: no dictionary for class edm::ProcessConfiguration is available
        Warning in <TClass::Init>: no dictionary for class edm::ParameterSetBlob is available
        Warning in <TClass::Init>: no dictionary for class edm::Hash<1> is available
        Warning in <TClass::Init>: no dictionary for class pair<edm::Hash<1>,edm::ParameterSetBlob> is available
        Warning in <TInterpreter::ReadRootmapFile>: class  podio::
        TClass::Init:0: RuntimeWarning:
        real
        user
        sys
        run locally
        No TTree was created for
        Warning in <Snapshot>: A lazy Snapshot action was booked but never triggered.
        cling::DynamicLibraryManager::loadLibrary(): libOpenGL.so.0: cannot open shared object file: No such file or directory
        Error in <AutoloadLibraryMU>: Failed to load library /cvmfs/sft.cern.ch/lcg/releases/ROOT/6.28.00
        cling::DynamicLibraryManager::loadLibrary(): libGLU.so.1: cannot open shared object file: No such file or directory
        Error in <TNetXNGFile::Close>:
        Warning in <TChain::AddFile>: Adding tree with no entries from file
        Plugin No such file or directory loading sec.protocol libXrdSecztn.so
        """
        normalErrs = normalErrs.split("\n")
        normalErrs = list(map(lambda k: k.strip(" ").strip("\t"), normalErrs))
        normalErrs = list(filter(lambda k: k != "", normalErrs))

        toResubmit = []

        def normalErrsF(k):
            for s in normalErrs:
                if s in k:
                    return True
                elif k.startswith(s):
                    return True
            return False

        for err in errs:
            try:
                with open(err) as file:
                    lines = file.read()
            except:
                print('Could not open error file for', err, 'continue')
                toResubmit.append(err)
                continue
            txt = lines.split("\n")
            # txt = list(filter(lambda k: k not in normalErrs, txt))
            txt = list(filter(lambda k: not normalErrsF(k), txt))
            txt = list(filter(lambda k: k.strip() != "", txt))
            if len(txt) > 0:
                print("Found unusual error in")
                print(err)
                print("\n")
                print("\n".join(txt[:5] + ['\n...\n'] + txt[-5:]))
                print("\n\n")
                toResubmit.append(err)
        toResubmit = list(map(lambda k: "".join(k.split("/")[-2]), toResubmit))
        print(toResubmit)
        if len(toResubmit) > 0:
            print("\n\nShould resubmit the following files\n")
            print(
                "queue 1 Folder in "
                + " ".join(list(map(lambda k: k.split("/")[-1], toResubmit)))
            )
            if resubmit == 1:
                from mkShapesRDF.shapeAnalysis.BatchSubmission import BatchSubmission

                BatchSubmission.resubmitJobs(batchFolder, tag, toResubmit, dryRun)

        if resubmit == 2:
            # resubmit all the jobs that are not finished
            toResubmit = list(map(lambda k: "".join(k.split("/")[-1]), notFinished))
            print(toResubmit)
            from mkShapesRDF.shapeAnalysis.BatchSubmission import BatchSubmission

            BatchSubmission.resubmitJobs(batchFolder, tag, toResubmit, dryRun)

    elif operationMode == 2:
        print(
            "",
            "".join(["#" for _ in range(20)]),
            "\n\n",
            "Merging root files",
            "\n\n",
            "".join(["#" for _ in range(20)]),
        )

        _samples = RunAnalysis.splitSamples(samples)
        print(_samples)
        __samples = list(map(lambda k: k[0] + '_' + str(k[3]), _samples))
        #__samples = list(set(__samples).difference(['DY_23', 'DY_19']) )
        print(len(__samples))
        outputFileTrunc = ".".join(outputFile.split(".")[:-1])
        filesToMerge = list(
            map(
                #lambda k: f"{folder}/{outputFolder}/{outputFileTrunc}__ALL__{k[0]}_{str(k[3])}.root",
                lambda k: f"{folder}/{outputFolder}/{outputFileTrunc}__ALL__{k}.root",
                __samples,
            )
        )

        print("\n\nMerging files\n\n")
        print("\n\n", filesToMerge, "\n\n")

        print(f"Hadding files into {folder}/{outputFolder}/{outputFile}")
        ret = subprocess.run(
            f'hadd -j {folder}/{outputFolder}/{outputFile} {" ".join(filesToMerge)}',
            shell=True,
        )

        # if True:
        if ret.returncode == 0:
            # ok = True
            # if ok:
            print("Hadd was successful")
            import mkShapesRDF.shapeAnalysis.latinos.LatinosUtils as utils

            f = ROOT.TFile.Open(f"{outputFolder}/{outputFile}", "update")
            # post process -> nuisance envelops and RMS
            cuts = cuts['cuts']
            categoriesmap = utils.flatten_cuts(cuts)
            for nuisance in nuisances.keys():
                if not (
                    nuisances[nuisance].get("kind", "").endswith("envelope")
                    or nuisances[nuisance].get("kind", "").endswith("rms")
                    or nuisances[nuisance].get("kind", "").endswith("square")
                ):
                    continue
                print('work for ', nuisance)
                #categoriesmap = utils.flatten_cuts(cuts)
                subsamplesmap = utils.flatten_samples(samples)

                utils.update_variables_with_categories(variables, categoriesmap)
                utils.update_nuisances_with_subsamples(nuisances, subsamplesmap)
                utils.update_nuisances_with_categories(nuisances, categoriesmap)
                # sys.exit()
                _cuts = list(cuts.keys())
                _samples = list(samples.keys())
                # print(cuts['cuts'])
                # print(variables)
                # print(utils.flatten_cuts(cuts['cuts']))
                for cut in _cuts:
                    # print('work in', cut)
                    # ROOT.gDirectory.cd(f"/{cut}")
                    # for variable in [k.GetName() for k in ROOT.gDirectory.GetListOfKeys()]:
                    for variable in variables.keys():
                        f.cd(f"/{cut}/{variable}")
                        print('work in ', cut, variable)
                        histos = [k.GetName() for k in ROOT.gDirectory.GetListOfKeys()]
                        for sampleName in _samples:
                            limitSamples = nuisances[nuisance].get('samples', {})
                            if not (len(limitSamples.keys()) == 0 or sampleName in limitSamples.keys()):
                                # print(f'{sampleName} does not have nuisance {nuisance}')
                                continue
                            histosNameToProcess = list(
                                filter(
                                    lambda k: k.startswith(
                                        f"histo_{sampleName}_{nuisances[nuisance]['name']}_SPECIAL_NUIS"
                                    ),
                                    histos,
                                )
                            )
                            histosToProcess = list(
                                map(
                                    lambda k: ROOT.gDirectory.Get(k).Clone(),
                                    histosNameToProcess,
                                )
                            )
                            if len(histosToProcess) == 0:
                                print(f'No variations found for {sampleName} in {cut}/{variable} for nuisance {nuisances[nuisance]["name"]}', file=sys.stderr,)
                                continue

                                sys.exit(1)
                            hNominal = ROOT.gDirectory.Get(
                                f"histo_{sampleName}"
                            ).Clone()

                            hName = f"histo_{sampleName}_{nuisances[nuisance]['name']}"
                            h_up = histosToProcess[0].Clone()
                            h_do = histosToProcess[0].Clone()
                            variations = np.empty(
                                (
                                    len(histosToProcess),
                                    histosToProcess[0].GetNbinsX() + 2,
                                ),
                                dtype=float,
                            )
                            for i in range(len(histosToProcess)):
                                variations[i, :] = hist2array(
                                    histosToProcess[i], flow=True
                                )
                            arrup = 0
                            arrdo = 0
                            if nuisances[nuisance]['kind'].endswith("envelope"):
                                arrup = np.max(variations, axis=0)
                                arrdo = np.min(variations, axis=0)
                            elif nuisances[nuisance]['kind'].endswith("rms"):
                                vnominal = hist2array(hNominal, flow=True)
                                arrnom = np.tile(vnominal, (variations.shape[0], 1))
                                arrv = np.sqrt(
                                    np.mean(np.square(variations - arrnom), axis=0)
                                )
                                arrup = vnominal + arrv
                                arrdo = vnominal - arrv
                            elif nuisances[nuisance]['kind'].endswith("square"):
                                vnominal = hist2array(hNominal, flow=True)
                                arrnom = np.tile(vnominal, (variations.shape[0], 1))
                                up_is_up = variations>arrnom
                                zeros = np.zeros_like(arrnom)
                                arrv = np.sqrt(
                                    np.sum(np.square(variations - arrnom), axis=0)
                                )
                                arrup = vnominal + arrv
                                arrdo = vnominal - arrv
                                # arrup = np.where(up_is_up, vnominal + arrv, vnominal - arrv)
                                # arrdo = np.where(~up_is_up, vnominal - arrv, vnominal + arrv)
                            else:
                                continue
                            print(arrup)
                            print(arrdo)
                            for i in range(0, h_up.GetNbinsX() + 2):
                                h_up.SetBinContent(i, arrup[i])
                                h_do.SetBinContent(i, arrdo[i])
                            print(hName)
                            h_up.SetName(hName + "Up")
                            h_up.Write()
                            h_do.SetName(hName + "Down")
                            h_do.Write()
                            for histo in histosNameToProcess:
                                ROOT.gDirectory.Delete(f"{histo};*")
            f.Close()

    else:
        print("Operating mode was set to -1, nothing was done")


if __name__ == "__main__":
    main()

## runner.py
from copy import deepcopy
import sys
import ROOT
from array import array
from mkShapesRDF.lib.parse_cpp import ParseCpp
import os
from math import sqrt

ROOT.gROOT.SetBatch(True)
ROOT.TH1.SetDefaultSumw2(True)

import numpy as np


# def hist2array(h, overflow=False, copy=True):
#     shift = 0
#     if overflow:
#         shift = 1
#     d = []
#     for i in range(1 - shift, h.GetNbinsX() + 1 + shift):
#         d.append(h.GetBinContent(i))
#     d = np.array(d)
#     return d

def hist2array(h, flow=False, copy=True, include_sumw2=True):
    nx = h.GetNbinsX() + 2
    dtype=0
    if isinstance(h, ROOT.TH1D):
        dtype=np.double
    elif isinstance(h, ROOT.TH1F):
        dtype=np.float
    elif isinstance(h, ROOT.TH1I):
        dtype=np.int
    else:
        print('Histogram of type', h, 'is not supperted', file=sys.stderr)
        sys.exit(1)

    vals = np.ndarray((nx, ), dtype=dtype, buffer=h.GetArray())
    sumw2 = np.ndarray((nx, ), dtype=dtype, buffer=h.GetSumw2().GetArray())

    shift = 0
    if flow:
        shift = 1
    vals = vals[shift: nx-shift]
    sum2 = sumw2[shift: nx-shift]

    if copy:
        vals = vals.copy()
        sumw2 = sumw2.copy()
    if include_sumw2:
        return vals, sumw2
    else:
        return vals

def _fold(_h, _to, _from):
    _h.SetBinContent(_to, _h.GetBinContent(_to) + _h.GetBinContent(_from))
    sumw2 = _h.GetSumw2()
    #_h.SetBinError(_to, sqrt(_h.GetBinError(_to) **2 + _h.GetBinError(_from)**2))
    sumw2[_to] = sumw2[_to] + sumw2[_from]
    _h.SetBinContent(_from, 0.0)
    sumw2[_from] = 0.0
    #_h.SetBinError(_from, 0.0)

class RunAnalysis:
    r"""Class athat craeates ``dfs`` and runs the analysiss"""

    @staticmethod
    def splitSamples(samples, useFilesPerJob=True):
        r"""static methods, takes a dictionary of samples and split them based on their weights and max num. of files

        Parameters
        ----------
            samples : dict
                dictionary of samples
            useFilesPerJob : bool, optional, default: True
                if you want to further split the samples based on max num. of files.

        Returns
        -------
            `list of tuple`
                each tuple will have a lenght of 5 (6 if subsamples are present), where the first element is the name of the sample, the second the list of files, the third the weight, and the fourth the index of this tuple compared to the other tuples of the same sample type, the fifth will be the isData flag (True if the sample is data, False otherwise). If subsamples are present, the sixth element will be the dict of subsamples
        """
        # will contain all the different samples splitted based on their weights and max num. of files
        splittedSamples = []
        for sampleName in list(samples.keys()):
            types = (
                {}
            )  # this will contain the the different type of files with their special weights for this sampleName
            # recall that samples[sampleName]['name'] contain a list of tuples, where a single tuple can have a size of 2 or 3:
            # first position for name of the process (DYJetsToLL_M-50), second list of files, third a possible special weight
            for filesType in samples[sampleName]["name"]:
                # no special weight (will use '1'), and at least one file found
                if len(filesType) == 2 and len(filesType[1]) > 0:
                    if "1" not in list(types.keys()):
                        types["1"] = []
                    types["1"].append(filesType + ("1.0",))
                elif len(filesType) == 3 and len(filesType[1]) > 0:
                    if filesType[2] not in list(types.keys()):
                        types[filesType[2]] = []
                    types[filesType[2]].append(filesType)
                else:
                    print("Error", sampleName, filesType, file=sys.stderr)
                    print(
                        "Either the sample proc you specified has no files, or the weight had problems",
                        file=sys.stderr,
                    )
                    sys.exit()

            i = 0
            for _type in list(types.keys()):
                # split files based on FilesPerJob or include them all
                __files = list(map(lambda k: k[1], types[_type]))
                # flatted list of files
                __files = [item for sublist in __files for item in sublist]
                if useFilesPerJob:
                    dim = samples[sampleName].get("FilesPerJob", len(__files))
                else:
                    dim = len(__files)
                __files = [__files[j : j + dim] for j in range(0, len(__files), dim)]
                for ___files in __files:
                    # the weights for these files will be the product of the weight inside this sampele (i.e. samples[sampleName]['weight'])
                    # and the product of the special weights that is in common to all of those files (i.e. sampleType[0])
                    # the common special weight can be retrived from the first of the list of files with this weight
                    # remember that the tuple has always size 3 now, the last position is for the special weight
                    weight = (
                        "( "
                        + samples[sampleName]["weight"]
                        + " ) * ( "
                        + types[_type][0][2]
                        + " )"
                    )
                    isData = samples[sampleName].get("isData", False)
                    sampleType = (sampleName, ___files, weight, i, isData)
                    if "subsamples" in list(samples[sampleName].keys()):
                        sampleType += (samples[sampleName]["subsamples"],)
                    splittedSamples.append(sampleType)
                    i += 1
        return splittedSamples

    @staticmethod
    def getTTreeNomAndFriends(fnom, friends):
        """Create a TChain with the nominal files and the friends files (nuisances TTrees with varied branches)

        Args:
            fnom (list): list of nominal files
            friends (list of list): list of list of friends files

        Returns:
            TChain: TChain with nominal files and friends files
        """
        tnom = ROOT.TChain("Events")
        for f in fnom:
            tnom.Add(f)
        for friend in friends:
            _tfriend = ROOT.TChain("Events")
            for f in friend:
                _tfriend.Add(f)
            tnom.AddFriend(_tfriend)
        return tnom

    @staticmethod
    def getNuisanceFiles(nuisance, files):
        """Searches in the provided nuisance folder for the files with the same name of the nominal files

        Args:
            nuisance (dict): dict with the nuisance information
            files (list): list of nominal files

        Returns:
            list of list: list with the down and up varied list of files
        """
        _files = list(map(lambda k: k.split("/")[-1], files))
        nuisanceFilesDown = list(
            map(lambda k: nuisance["folderDown"] + "/" + k, _files)
        )
        nuisanceFilesUp = list(map(lambda k: nuisance["folderUp"] + "/" + k, _files))
        for nuisFile in nuisanceFilesUp + nuisanceFilesDown:
            # print(nuisFile)
            if not os.path.exists(nuisFile):
                print("File", nuisFile, "does not exist", file=sys.stderr)
                sys.exit(1)
        return [nuisanceFilesDown, nuisanceFilesUp]

    @staticmethod
    def index_sub(string, sub):
        try:
            return string.index(sub)
        except ValueError:
            return -10000

    def __init__(
        self,
        samples,
        aliases,
        variables,
        cuts,
        nuisances,
        lumi,
        limit=-1,
        outputFileMap="output.root",
    ):
        r"""
        Stores arguments in the class attributes and creates all the RDataFrame objects

        Parameters
        ----------

            samples : `list of tuple`
                same type as the return of the splitSamples method
            aliases : dict
                dict of aliases
            variables : dict
                dict of variables
            cuts : dict
                dict of cuts, contains two keys (preselections: str, cuts: dict)
            nuisances : dict
                dict of nuisances
            lumi : float
                lumi in fb-1
            limit : int, optional, default: -1
                limit of events to be processed
            outputFileMap : str, optional, defaults: 'output.root'
                full path + filename of the output root file.

        Returns
        -------
            None

        """
        self.samples = samples
        self.aliases = aliases
        self.variables = variables
        self.preselections = cuts["preselections"]
        mergedCuts = {}
        for cut in list(cuts["cuts"].keys()):
            __cutExpr = ""
            if type(cuts["cuts"][cut]) == dict:
                __cutExpr = cuts["cuts"][cut]["expr"]
                for cat in list(cuts["cuts"][cut]["categories"].keys()):
                    mergedCuts[cut + "_" + cat] = {"parent": cut}
                    mergedCuts[cut + "_" + cat]["expr"] = (
                        __cutExpr + " && " + cuts["cuts"][cut]["categories"][cat]
                    )
            elif type(cuts["cuts"][cut]) == str:
                __cutExpr = cuts["cuts"][cut]
                mergedCuts[cut] = {"expr": __cutExpr, "parent": cut}
        self.cuts = mergedCuts
        self.nuisances = nuisances
        self.lumi = lumi
        self.limit = limit
        self.outputFileMap = outputFileMap
        self.remappedVariables = {}

        self.dfs = {}
        r"""
        dfs is a dictionary containing as keys the sampleNames. The structure should look like this:

        ..  code-block:: python

            dfs = {
                'DY':{
                    0: {
                        'df': obj,
                        'columnNames': [...],
                        'usedVariables': [...],
                        'ttree': obj2, # needed otherwise seg fault in root
                    },
                }
            }

        """

        usedVariables = set()
        usedVariables = usedVariables.union(
            ParseCpp.listOfVariables(ParseCpp.parse(self.preselections))
        )
        for cut in mergedCuts.keys():
            usedVariables = usedVariables.union(
                ParseCpp.listOfVariables(ParseCpp.parse(mergedCuts[cut]["expr"]))
            )
        for var in variables.keys():
            if "name" in variables[var].keys():
                __var = variables[var]["name"].split(":")
                __vars = list(
                    map(lambda k: ParseCpp.listOfVariables(ParseCpp.parse(k)), __var)
                )
                for __v in __vars:
                    usedVariables = usedVariables.union(__v)

            elif "tree" in variables[var].keys():
                for branch in variables[var]["tree"].keys():
                    usedVariables = usedVariables.union(
                        ParseCpp.listOfVariables(
                            ParseCpp.parse(variables[var]["tree"][branch])
                        )
                    )
            else:
                print("Cannot process variable ", var, " nuisances might be faulty")

        # sample here is a tuple, first el is the sampleName, second list of files,
        # third the special weight, forth is the index of tuple for the same sample,
        # fifth if present the dict of subsamples
        for sample in samples:
            files = sample[1]
            sampleName = sample[0]
            friendsFiles = []
            usedFolders = []
            for nuisance in self.nuisances.values():
                if sampleName not in nuisance.get("samples", {sampleName: []}):
                    continue
                if nuisance.get("type", "") == "shape":
                    if nuisance.get("kind", "") == "suffix":
                        if nuisance.get("folderUp", "") != "":
                            if nuisance["folderUp"] in usedFolders:
                                continue
                            usedFolders.append(nuisance["folderUp"])

                            friendsFiles += RunAnalysis.getNuisanceFiles(
                                nuisance, files
                            )
            tnom = RunAnalysis.getTTreeNomAndFriends(files, friendsFiles)

            if limit != -1:
                df = ROOT.RDataFrame(tnom)
                df = df.Range(limit)
            else:
                #ROOT.EnableImplicitMT()
                df = ROOT.RDataFrame(tnom)
            if sampleName not in self.dfs.keys():
                self.dfs[sample[0]] = {}
            self.dfs[sampleName][sample[3]] = {
                "df": df,
                "ttree": tnom,
                "usedVariables": list(usedVariables),
            }
            self.dfs[sampleName][sample[3]]["columnNames"] = list(
                map(lambda k: str(k), df.GetColumnNames())
            )

        self.definedAliases = {}

        print("\n\nLoaded dataframes\n\n")

    def loadAliases(self, afterNuis=False):
        """
        Load aliases in the dataframes. It does not create the special alias ``weight`` for which a special method is used.

        Parameters
        ----------
            afterNuis : bool, optional, default: False
                if True, only aliases with the key ``afterNuis`` set to True will be loaded

        """
        aliases = self.aliases
        for sampleName in self.dfs.keys():
            for index in self.dfs[sampleName].keys():
                df = self.dfs[sampleName][index]["df"]

                define_string = "df"
                usedVariables = set(self.dfs[sampleName][index]["usedVariables"])
                for alias in list(aliases.keys()):
                    # only load aliases needed for nuisances!
                    # if beforeNuis
                    if (afterNuis) != (aliases[alias].get("afterNuis", False)):
                        if "expr" in aliases[alias]:
                            usedVariables = usedVariables.union(
                                ParseCpp.listOfVariables(
                                    ParseCpp.parse(aliases[alias]["expr"])
                                )
                            )
                        continue

                    if "samples" in list(aliases[alias]):
                        if sampleName not in aliases[alias]["samples"]:
                            continue
                    if alias in self.dfs[sampleName][index]["columnNames"]:
                        if alias != aliases[alias].get("expr", ""):
                            print(
                                f"Alias {alias} cannot be defined, column with that name already exists"
                            )
                            sys.exit()
                        else:
                            # since the alias key is equal to expr there's no need to keep the alias
                            del self.aliases[alias]
                            continue

                    for line in aliases[alias].get("linesToAdd", []):
                        # RPLME_nThreads is used to set the number of threads
                        ROOT.gInterpreter.Declare(
                            line.replace("RPLME_nThreads", str(df.GetNSlots()))
                        )

                    for line in aliases[alias].get("linesToProcess", []):
                        # RPLME_nThreads is used to set the number of threads
                        exec(line, globals())

                    if "expr" in list(aliases[alias].keys()):
                        define_string += (
                            f".Define('{alias}', '{aliases[alias]['expr']}') \\\n\t"
                        )
                        #print(define_string)
                        usedVariables = usedVariables.union(
                            ParseCpp.listOfVariables(
                                ParseCpp.parse(aliases[alias]["expr"])
                            )
                        )

                    elif "exprSlot" in list(aliases[alias].keys()):
                        args = aliases[alias]["exprSlot"]
                        args[0] = args[0].replace("RPLME_nThreads", str(df.GetNSlots()))
                        define_string += (
                            f".DefineSlot('{alias}', '{args[0]}', [ {args[1]} ]) \\\n\t"
                        )
                        usedVariables = usedVariables.union(
                            ParseCpp.listOfVariables(
                                ParseCpp.parse(aliases[alias]["exprSlot"][1])
                            )
                        )

                    elif "class" in list(aliases[alias].keys()):
                        define_string += f".Define('{alias}', '{aliases[alias]['class']} ( {aliases[alias].get('args', '')}  )') \\\n\t"

                df1 = eval(define_string)
                self.dfs[sampleName][index]["df"] = df1

                stringPrint = "before nuisances"
                if afterNuis:
                    stringPrint = "after nuisances"
                print(
                    f"\n\nLoaded all aliases {stringPrint} for {sampleName} index {index}\n\n"
                )

                self.dfs[sampleName][index]["usedVariables"] = list(usedVariables)

                self.dfs[sampleName][index]["columnNames"] = list(
                    map(lambda k: str(k), df1.GetColumnNames())
                )

    def loadAliasWeight(self):
        """
        Loads only the special alias ``weight`` in the dataframes.
        """
        samples = self.samples
        aliases = self.aliases
        for sampleName in self.dfs.keys():
            for index in self.dfs[sampleName].keys():
                df = self.dfs[sampleName][index]["df"]  # noqa: F841

                # Define the most importante alias: the weight!
                sample = list(
                    filter(lambda k: k[0] == sampleName and k[3] == index, samples)
                )[0]
                weight = sample[2]
                isData = sample[4]

                if not isData:
                    aliases["weight"] = {"expr": str(self.lumi) + " * " + weight}
                else:
                    aliases["weight"] = {"expr": weight}

                print("\n\n", sampleName, "\n\n", aliases["weight"])

                # load the alias weight
                define_string = "df"

                alias = "weight"

                if alias in self.dfs[sampleName][index]["columnNames"]:
                    print(
                        f"Alias {alias} cannot be defined, column with that name already exists"
                    )
                    sys.exit()

                define_string += (
                    f".Define('{alias}', '{aliases[alias]['expr']}') \\\n\t"
                )

                df1 = eval(define_string)
                self.dfs[sampleName][index]["df"] = df1

                print(f"\n\nLoaded alias weight for {sampleName} index {index}\n\n")

                self.dfs[sampleName][index]["columnNames"].append("weight")

    def loadSystematicsSuffix(self):
        """
        Loads systematics of type ``suffix`` in the dataframes.
        """
        for sampleName in self.dfs.keys():
            for index in self.dfs[sampleName].keys():
                df = self.dfs[sampleName][index]["df"]
                columnNames = self.dfs[sampleName][index]["columnNames"]
                nuisances = self.nuisances
                # nuisance key is not used
                for _, nuisance in list(nuisances.items()):
                    if sampleName not in nuisance.get("samples", {sampleName: []}):
                        continue
                    if nuisance.get("type", "") == "shape":
                        if nuisance.get("kind", "") == "suffix":
                            variation = nuisance["mapDown"]
                            separator = nuisance.get("separator", "_")
                            variedCols = list(
                                filter(lambda k: k.endswith(variation), columnNames)
                            )
                            if len(variedCols) == 0:
                                print(f"No varied columns for {variation}")
                                sys.exit()
                            # if 'JER' in nuisance['name']:
                            #     print(variedCols)
                            baseCols = list(
                                map(
                                    lambda k: k[
                                        RunAnalysis.index_sub(k, "Events.")
                                        + len("Events.") : -len(separator + variation)
                                    ]
                                    if "Events" in k
                                    else k[: -len(separator + variation)],
                                    variedCols,
                                )
                            )
                            for baseCol in baseCols:
                                if (
                                    baseCol
                                    not in self.dfs[sampleName][index]["usedVariables"]
                                ):
                                    # baseCol is never used -> useless to register variation
                                    print("unused variable", baseCol)
                                    continue
                                print(baseCol in [str(k) for k in df.GetColumnNames()])
                                if not (baseCol in [str(k) for k in df.GetColumnNames()]):
                                    continue

                                if "bool" not in str(df.GetColumnType(baseCol)).lower():
                                    varNameDown = (
                                        baseCol
                                        + separator
                                        + nuisance["mapDown"]
                                        + "*"
                                        + nuisance["samples"][sampleName][1]
                                    )
                                    varNameUp = (
                                        baseCol
                                        + separator
                                        + nuisance["mapUp"]
                                        + "*"
                                        + nuisance["samples"][sampleName][0]
                                    )
                                else:
                                    varNameDown = (
                                        baseCol + separator + nuisance["mapDown"]
                                    )
                                    varNameUp = baseCol + separator + nuisance["mapUp"]

                                _type = df.GetColumnType(baseCol)
                                expr = (
                                    ParseCpp.RVecExpression(_type)
                                    + "{"
                                    + f"{varNameDown}, {varNameUp}"
                                    + "}"
                                )
                                if "JER" in nuisance["name"]:
                                    print("JER expr", expr)
                                df = df.Vary(
                                    baseCol,
                                    expr,
                                    variationTags=["Down", "Up"],
                                    variationName=nuisance["name"],
                                )

                        else:
                            continue
                        # else:
                        #     print("Unsupported nuisance")
                        #     sys.exit()
                self.dfs[sampleName][index]["df"] = df

    def loadSystematicsReweights(self):
        """
        Loads systematics of type ``suffix`` in the dataframes.
        """
        for sampleName in self.dfs.keys():
            for index in self.dfs[sampleName].keys():
                df = self.dfs[sampleName][index]["df"]
                columnNames = self.dfs[sampleName][index]["columnNames"]
                nuisances = self.nuisances
                # nuisance key is not used
                for _, nuisance in list(nuisances.items()):
                    if sampleName not in nuisance.get("samples", {sampleName: []}):
                        continue
                    if nuisance.get("type", "") == "shape":
                        if nuisance.get("kind", "") == "weight":
                            weights = nuisance["samples"].get(sampleName, None)
                            if weights is not None:
                                variedNames = []
                                if weights[0] not in columnNames:
                                    df = df.Define(nuisance["name"] + "_up", weights[0])
                                    variedNames.append(nuisance["name"] + "_up")
                                else:
                                    variedNames.append(weights[0])

                                if weights[1] not in columnNames:
                                    df = df.Define(
                                        nuisance["name"] + "_down", weights[1]
                                    )
                                    variedNames.append(nuisance["name"] + "_down")
                                else:
                                    variedNames.append(weights[1])

                                if df.GetColumnType("weight") == "double":
                                    expr = (
                                        "ROOT::RVecD"
                                        + "{ weight * (double)"
                                        + f"{variedNames[1]}, weight * (double) {variedNames[0]}"
                                        + "}"
                                    )
                                elif df.GetColumnType("weight") == "float":
                                    expr = (
                                        "ROOT::RVecF"
                                        + "{ weight * (float)"
                                        + f"{variedNames[1]}, weight * (float) {variedNames[0]}"
                                        + "}"
                                    )
                                else:
                                    print(
                                        "Weight column has unknown type:",
                                        df.GetColumnType("weight"),
                                        "while varied is: ",
                                        df.GetColumnType(variedNames[1]),
                                    )
                                    sys.exit()

                                df = df.Vary(
                                    "weight",
                                    expr,
                                    variationTags=["Down", "Up"],
                                    variationName=nuisance["name"],
                                )
                        else:
                            continue
                        # else:
                        #     print("Unsupported nuisance")
                        #     sys.exit()
                self.dfs[sampleName][index]["df"] = df

    def loadSystematicsReweightsEnvelopeRMS(self):
        """
        Loads systematics of type ``weight_envelope`` or type ``weight_rms`` in the dataframes.
        """
        for sampleName in self.dfs.keys():
            for index in self.dfs[sampleName].keys():
                df = self.dfs[sampleName][index]["df"]
                columnNames = self.dfs[sampleName][index]["columnNames"]
                nuisances = self.nuisances
                # nuisance key is not used
                for _, nuisance in list(nuisances.items()):
                    if sampleName not in nuisance.get("samples", {sampleName: []}):
                        continue
                    if nuisance.get("type", "") == "shape":
                        if (
                            nuisance.get("kind", "") == "weight_envelope"
                            or nuisance.get("kind", "") == "weight_rms"
                            or nuisance.get("kind", "") == "weight_square"
                        ):
                            weights = nuisance["samples"].get(sampleName, None)
                            if weights is not None:
                                variedNames = []
                                variedTags = []
                                for i, weight in enumerate(weights):
                                    if weight not in columnNames:
                                        nuisName = f'{nuisance["name"]}_{i}'
                                        df = df.Define(nuisName, weight)
                                        variedNames.append(nuisName)
                                        variedTags.append(str(i))
                                    else:
                                        variedNames.append(weight)
                                        variedTags.append(str(i))

                                if df.GetColumnType("weight") == "double":
                                    expr = (
                                        "ROOT::RVecD"
                                        + "{ "
                                        + ", ".join(
                                            [
                                                f"weight * (double) {variedName}"
                                                for variedName in variedNames
                                            ]
                                        )
                                        + "}"
                                    )
                                elif df.GetColumnType("weight") == "float":
                                    expr = (
                                        "ROOT::RVecF"
                                        + "{ "
                                        + ", ".join(
                                            [
                                                f"weight * (float) {variedName}"
                                                for variedName in variedNames
                                            ]
                                        )
                                        + "}"
                                    )
                                else:
                                    print(
                                        "Weight column has unknown type:",
                                        df.GetColumnType("weight"),
                                        "while varied is: ",
                                        df.GetColumnType(variedNames[0]),
                                    )
                                    sys.exit()

                                df = df.Vary(
                                    "weight",
                                    expr,
                                    variationTags=variedTags,
                                    variationName=nuisance["name"]
                                    + "_SPECIAL_NUIS_"
                                    + nuisance["kind"].split("_")[-1],
                                )
                        else:
                            continue
                self.dfs[sampleName][index]["df"] = df

    def loadVariables(self):
        """
        Loads variables (not the ones with the 'tree' key in them),
        and checks if they are already in the dataframe columns,
        if so it adds ``__`` at the beginning of the name.

        Since variables are shared but not the aliases,
        it could happen that a variable's name or expression is already defined
        for a given df but not for another one ->
        need to determine a common and compatible set of variables for all the many dfs.

        This is done by gathering the largest set of column names.
        """

        bigColumnNames = set()
        for sampleName in self.dfs.keys():
            for index in self.dfs[sampleName].keys():
                columnNames = self.dfs[sampleName][index]["columnNames"]
                bigColumnNames = bigColumnNames.union(set(columnNames))

        for sampleName in self.dfs.keys():
            for index in self.dfs[sampleName].keys():
                for var in list(self.variables.keys()):
                    if "tree" in self.variables[var].keys():
                        continue
                    if var in bigColumnNames and var != self.variables[var]["name"]:
                        # here I want to define a variable whose key is already in the column name list
                        # and its expression is different from its name
                        # therefore we will either create a Define or an Alias

                        # need to rename the new variable!
                        prefix = "__"
                        nVar = prefix + var
                        while nVar in bigColumnNames:
                            prefix += "__"
                            nVar = prefix + var
                        # print("changing variable", var, "to " + nVar)
                        self.remappedVariables[nVar] = prefix
                        self.variables[nVar] = deepcopy(self.variables[var])
                        del self.variables[var]

        for sampleName in self.dfs.keys():
            for index in self.dfs[sampleName].keys():
                df = self.dfs[sampleName][index]["df"]
                for var in list(self.variables.keys()):
                    if "tree" in self.variables[var].keys():
                        continue
                    for i, _var in enumerate(self.variables[var]["name"].split(":")):
                        n = var if i == 0 else var + f"_{i}"

                        if _var not in bigColumnNames:
                            # the variable expr does not exist, create it
                            df = df.Define(n, _var)
                        elif n not in bigColumnNames:
                            # the variable expr exists in the df, but not the variable key
                            # use alias
                            df = df.Alias(n, _var)
                        elif n == _var and n in bigColumnNames:
                            # since the variable name and expression are equal and are already present in the df don't do anything
                            pass
                        else:
                            # FIXME
                            print("Error, cannot define variable")
                            sys.exit()
                self.dfs[sampleName][index]["df"] = df
                self.dfs[sampleName][index]["columnNames"] = list(
                    map(lambda k: str(k), df.GetColumnNames())
                )

    def loadBranches(self):
        """
        Loads branches (the ones specified in an ``alias`` with the ``tree`` key in them),
        and checks if they are already in the dataframe columns,
        if so it adds ``__`` at the beginning of the name.

        Since variables are shared but not the aliases,
        it could happen that a variable's name or expression is already defined
        for a given df but not for another one ->
        need to determine a common and compatible set of variables for all the many dfs.

        This is done by gathering the largest set of column names.
        """

        bigColumnNames = set()
        for sampleName in self.dfs.keys():
            for index in self.dfs[sampleName].keys():
                columnNames = self.dfs[sampleName][index]["columnNames"]
                bigColumnNames = bigColumnNames.union(set(columnNames))

        for sampleName in self.dfs.keys():
            for index in self.dfs[sampleName].keys():
                for var in list(self.variables.keys()):
                    if "tree" not in self.variables[var].keys():
                        continue
                    for branch in list(self.variables[var]["tree"].keys()):
                        if (
                            branch in bigColumnNames
                            and branch != self.variables[var]["tree"][branch]
                        ):
                            # here I want to define a variable whose key is already in the column name list
                            # and its expression is different from its name
                            # therefore we will either create a Define or an Alias

                            # need to rename the new variable!
                            prefix = "__"
                            nVar = prefix + branch
                            while nVar in bigColumnNames:
                                prefix += "__"
                                nVar = prefix + branch
                            # print("changing variable", branch, "to " + nVar)
                            self.remappedVariables[nVar] = prefix
                            self.variables[var]["tree"][nVar] = self.variables[var][
                                "tree"
                            ][branch]
                            del self.variables[var]["tree"][branch]

        for sampleName in self.dfs.keys():
            for index in self.dfs[sampleName].keys():
                df = self.dfs[sampleName][index]["df"]
                for var in list(self.variables.keys()):
                    if "tree" not in self.variables[var].keys():
                        continue
                    for branch in self.variables[var]["tree"].keys():
                        # print("working on defining branch", branch)
                        if self.variables[var]["tree"][branch] not in bigColumnNames:
                            # the variable expr does not exist, create it
                            # print("define the branch")
                            df = df.Define(branch, self.variables[var]["tree"][branch])
                        elif branch not in bigColumnNames:
                            # the variable expr exists in the df, but not the variable key
                            # use alias
                            # print("define an alias to the branch")
                            df = df.Alias(branch, self.variables[var]["tree"][branch])
                        elif (
                            branch == self.variables[var]["tree"][branch]
                            and branch in bigColumnNames
                        ):
                            # since the variable name and expression are equal and are already present in the df don't do anything
                            pass
                        else:
                            # FIXME
                            print("Error, cannot define variable")
                            sys.exit()
                self.dfs[sampleName][index]["df"] = df
                self.dfs[sampleName][index]["columnNames"] = list(
                    map(lambda k: str(k), df.GetColumnNames())
                )

    def createResults(self):
        """
        Create empty dictionary for results, will store all the different histos
        """
        self.results = {}
        for cut in self.cuts.keys():
            self.results[cut] = {}
            for variable in self.variables.keys():
                self.results[cut][variable] = {}

    def splitSubsamples(self):
        """
        Split samples into subsamples if needed

        After this method the ``dfs`` attribute will be modified to contain the subsamples names instead of the original sample name
        """
        sampleNames = set(
            list(map(lambda k: k[0], list(filter(lambda k: len(k) == 6, self.samples))))
        )
        for sampleName in sampleNames:
            _sample = list(filter(lambda k: k[0] == sampleName, self.samples))[0]
            for subsample in list(_sample[5].keys()):
                self.dfs[sampleName + "_" + subsample] = {}
                for index in self.dfs[sampleName].keys():
                    self.dfs[sampleName + "_" + subsample][index] = {
                        "parent": sampleName
                    }
                    subsampleCut = _sample[5][subsample]
                    subsampleWeight = '1.0'
                    if isinstance(subsampleCut, tuple) or  isinstance(subsampleCut, list):
                        subsampleCut = _sample[5][subsample][0]
                        subsampleWeight = _sample[5][subsample][1]

                    self.dfs[sampleName + "_" + subsample][index]["df"] = self.dfs[
                        sampleName
                    ][index]["df"].Filter(subsampleCut).Redefine('weight', 'weight * ' + subsampleWeight)
                    self.dfs[sampleName + "_" + subsample][index][
                        "columnNames"
                    ] = self.dfs[sampleName][index]["columnNames"]
                    self.dfs[sampleName + "_" + subsample][index]["ttree"] = self.dfs[
                        sampleName
                    ][index]["ttree"]

            del self.dfs[sampleName]

    def create_cuts_vars(self):
        """
        Defines ``Histo1D`` for each variable and cut and dataframe. It also creates dictionary for variations through ``VariationsFor()``
        """
        mergedCuts = self.cuts
        variables = self.variables
        for sampleName in self.dfs.keys():
            for index in self.dfs[sampleName].keys():
                df = self.dfs[sampleName][index]["df"]

                for cut in mergedCuts.keys():
                    df_cat = df.Filter(mergedCuts[cut]["expr"])
                    opts = ROOT.RDF.RSnapshotOptions()
                    opts.fLazy = True

                    for var in list(variables.keys()):
                        if "tree" in variables[var].keys():
                            if not mergedCuts[cut]["parent"] in variables[var]["cuts"]:
                                # del df_cat
                                continue

                            outpath = self.outputFileMap
                            if self.outputFileMap != "output.root":
                                outfileName = (
                                    ".".join(
                                        self.outputFileMap.split("/")[-1].split(".")[
                                            :-1
                                        ]
                                    )
                                    + f"__ALL__{cut}_{sampleName}_{str(index)}.root"
                                )
                                outpath = (
                                    "/".join(self.outputFileMap.split("/")[:-1])
                                    + "/"
                                    + outfileName
                                )
                            else:
                                outpath = f"output_{cut}_{sampleName}_{str(index)}.root"

                            # _h = df_cat.Snapshot(cut + '/' + sampleName, outpath, list(self.variables[var]['tree'].keys()), opts)

                            # FIXME always save the weight!
                            """
                            ___cols  = list(self.variables[var]["tree"].keys()) + ["weight"],
                            print(___cols)
                            print(type(df_cat))
                            print(df_cat)
                            __cols = [str(k) for k in df_cat.GetColumnNames()]
                            #print(__cols)
                            print(set(___cols[0]).difference(__cols))
                            """

                            _h = df_cat.Snapshot(
                                "Events",
                                outpath,
                                list(self.variables[var]["tree"].keys()) + ["weight"],
                                opts,
                            )
                        else:
                            vs = variables[var]["name"].split(":")

                            histRange = []
                            if len(variables[var]["range"]) == len(vs):
                                # bin endges are provided by the user
                                for ax in variables[var]["range"]:
                                    histRange.extend([len(ax) - 1, array("d", ax)])
                            else:
                                histRange = variables[var]["range"]

                            histRange = tuple(histRange)

                            if len(vs) == 1:
                                _h = df_cat.Histo1D(
                                    (cut + "_" + var, "") + histRange, var, "weight"
                                )
                            elif len(vs) == 2:
                                varNames = []
                                for i, _var in enumerate(vs):
                                    n = var if i == 0 else var + f"_{i}"
                                    varNames.append(n)

                                _h = df_cat.Histo2D(
                                    (cut + "_" + var, "") + histRange,
                                    *varNames,
                                    "weight",
                                )
                            else:
                                print("Unknown dimension of histo for variable", var)
                                sys.exit()
                        if sampleName not in self.results[cut][var].keys():
                            self.results[cut][var][sampleName] = {}
                        self.results[cut][var][sampleName][index] = _h

                for cut in mergedCuts.keys():
                    for var in list(variables.keys()):
                        if "tree" not in variables[var].keys():
                            _s = self.results[cut][var][sampleName][index]
                            _s_var = ROOT.RDF.Experimental.VariationsFor(_s)
                            self.results[cut][var][sampleName][index] = _s_var

    def convertResults(self):
        """
        Gather resulting histograms and fold them if needed.

        Systematics are also saved.
        """
        mergedCuts = self.cuts
        variables = self.variables
        for sampleName in self.dfs.keys():
            for index in self.dfs[sampleName].keys():
                for cut in mergedCuts.keys():
                    for var in list(variables.keys()):
                        if "tree" in variables[var].keys():
                            # no need to process SnapShots
                            continue
                        _s_var = self.results[cut][var][sampleName][index]
                        _histos = {}
                        # _histos_special_nuis = {}
                        for _variation in list(map(lambda k: str(k), _s_var.GetKeys())):
                            _h_name = _variation.replace(":", "")
                            _h = 0
                            _h = _s_var[_variation]
                            fold = variables[var].get("fold", 0)

                            if isinstance(_h, ROOT.TH1D):

                                if fold == 1 or fold == 3:
                                    _from = 0
                                    _to = 1
                                    _fold(_h, _to, _from)

                                if fold == 2 or fold == 3:
                                    lastBin = _h.GetNbinsX()
                                    _from = lastBin + 1
                                    _to = lastBin
                                    _fold(_h, _to, _from)

                            elif isinstance(_h, ROOT.TH2D):
                                nx = _h.GetNbinsX()
                                ny = _h.GetNbinsY()

                                if fold == 1 or fold == 3:

                                    for i in range(0, ny+2):
                                        _from = 0
                                        _to = 0
                                        if i == 0:
                                            _from = _h.GetBin(0, i)
                                            _to   = _h.GetBin(1, i+1)
                                        elif i == ny+1:
                                            _from = _h.GetBin(0, i)
                                            _to   = _h.GetBin(1, i-1)
                                        else:
                                            _from = _h.GetBin(0, i)
                                            _to   = _h.GetBin(1, i)
                                        _fold(_h, _to, _from)

                                    for i in range(0, nx+2):
                                        _from = 0
                                        _to = 0
                                        if i == 0:
                                            _from = _h.GetBin(i, 0)
                                            _to   = _h.GetBin(i+1, 1)
                                        elif i == ny+1:
                                            _from = _h.GetBin(i, 0)
                                            _to   = _h.GetBin(i-1, 1)
                                        else:
                                            _from = _h.GetBin(i, 0)
                                            _to   = _h.GetBin(i, 1)
                                        _fold(_h, _to, _from)

                                if fold == 2 or fold == 3:

                                    for i in range(0, ny+2):
                                        _from = 0
                                        _to = 0
                                        if i == 0:
                                            _from = _h.GetBin(nx+1, i)
                                            _to   = _h.GetBin(nx  , i+1)
                                        elif i == ny+1:
                                            _from = _h.GetBin(nx+1, i)
                                            _to   = _h.GetBin(nx  , i-1)
                                        else:
                                            _from = _h.GetBin(nx+1, i)
                                            _to   = _h.GetBin(nx  , i)
                                        _fold(_h, _to, _from)

                                    for i in range(0, nx+2):
                                        _from = 0
                                        _to = 0
                                        if i == 0:
                                            _from = _h.GetBin(i, nx+1)
                                            _to   = _h.GetBin(i+1, nx)
                                        elif i == ny+1:
                                            _from = _h.GetBin(i, nx+1)
                                            _to   = _h.GetBin(i-1, nx)
                                        else:
                                            _from = _h.GetBin(i, nx+1)
                                            _to   = _h.GetBin(i, nx)
                                        _fold(_h, _to, _from)

                                #unroll 2d
                                _name = _h.GetName()
                                _h.SetName(_name + "_old")
                                new_name = _name + "_" + _h_name + "_" + sampleName + "_" + str(index)
                                # print('creating histo with name', new_name)
                                _h2 = ROOT.TH1D(new_name, _h.GetName(), nx*ny, 1, nx*ny)
                                for i in range(1, nx+1):
                                    for j in range(1, ny+1):
                                        new_ind = (i-1) * ny + j
                                        old_ind = _h.GetBin(i,j)
                                        _h2.SetBinContent(new_ind, _h.GetBinContent(old_ind))
                                        _h2.SetBinError(new_ind, _h.GetBinError(old_ind))
                                del _h
                                _h = _h2

                            # if 'SPECIAL_NUIS' in _h_name:
                            #     nuisName = _variation.split(':')[0]
                            #     if nuisName in _histos_special_nuis.keys():
                            #         _histos_special_nuis[nuisName].append(_h.Clone())
                            #     else:
                            #         _histos_special_nuis[nuisName] = [_h.Clone()]

                            #     continue

                            _histos[_h_name] = _h#.Clone()
                            # del _h
                        # del self.results[cut][var][sampleName]['object']
                        # replace the object with the dictionary of histos

                        # for nuisName in _histos_special_nuis.keys():
                        #     # print(nuisName)
                        #     hName = nuisName.split('_SPECIAL_NUIS')[0]
                        #     h_up = _histos_special_nuis[nuisName][0].Clone()
                        #     h_do = _histos_special_nuis[nuisName][0].Clone()
                        #     variations = np.empty((len(_histos_special_nuis[nuisName]), _histos_special_nuis[nuisName][0].GetNbinsX()+2), dtype=float)
                        #     for i in range(len(_histos_special_nuis[nuisName])):
                        #         variations[i, :] = hist2array(_histos_special_nuis[nuisName][i], overflow=True)
                        #     arrup = 0
                        #     arrdo = 0
                        #     if nuisName.endswith('envelope'):
                        #         arrup = np.max(variations, axis=0)
                        #         arrdo = np.min(variations, axis=0)
                        #     elif nuisName.endswith('rms'):
                        #         vnominal = hist2array(_histos[hName], overflow=True)
                        #         arrnom = np.tile(vnominal,(variations.shape[0], 1) )
                        #         arrv = np.sqrt(np.mean(np.square(variations - arrnom), axis=0))
                        #         arrup = vnominal + arrv
                        #         arrdo = vnominal - arrv
                        #     else:
                        #         continue

                        #     for i in range(0, h_up.GetNbinsX() +2 ):
                        #         h_up.SetBinContent(i, arrup[i])
                        #         h_do.SetBinContent(i, arrdo[i])

                        #     _histos[hName + 'Up'] = h_up.Clone()
                        #     _histos[hName + 'Down'] = h_do.Clone()

                        self.results[cut][var][sampleName][index] = _histos

    def saveResults(self):
        """
        Save results in a root file.

        If ``Snapshot`` were created will merge them in a output file.

        """

        f = ROOT.TFile(self.outputFileMap, "recreate")

        toBeDeleted = []
        openedFiles = []
        openedTrees = []
        for sampleName in self.dfs.keys():
            for index in self.dfs[sampleName].keys():
                for cut in self.cuts.keys():
                    for var in list(self.variables.keys()):
                        if "tree" in self.variables[var].keys():
                            if (
                                not self.cuts[cut]["parent"]
                                in self.variables[var]["cuts"]
                            ):
                                continue

                            if self.outputFileMap != "output.root":
                                outfileName = (
                                    ".".join(
                                        self.outputFileMap.split("/")[-1].split(".")[
                                            :-1
                                        ]
                                    )
                                    + f"__ALL__{cut}_{sampleName}_{str(index)}.root"
                                )
                                outpath = (
                                    "/".join(self.outputFileMap.split("/")[:-1])
                                    + "/"
                                    + outfileName
                                )
                            else:
                                outpath = f"output_{cut}_{sampleName}_{str(index)}.root"
                            _f = ROOT.TFile(outpath)
                            openedFiles.append(_f)

                            if _f.GetListOfKeys().Contains("Events"):
                                t = _f.Get("Events")
                                openedTrees.append(t)
                                f.cd("/")
                                folder = f"trees/{cut}/{sampleName}/"
                                # print("Creating dir", folder, outpath)
                                toBeDeleted.append(outpath)
                                ROOT.gDirectory.mkdir(folder, "", True)
                                ROOT.gDirectory.cd("/" + folder)
                                t.CloneTree().Write()
                            else:
                                print(
                                    "No TTree was created for",
                                    cut,
                                    var,
                                    sampleName,
                                    index,
                                    "even if requested",
                                    file=sys.stderr,
                                )

        f.cd("/")

        for cut_cat in list(self.results.keys()):
            _cut_cat = f.mkdir(cut_cat)
            for var in list(self.results[cut_cat].keys()):
                if "tree" in self.variables[var].keys():
                    continue
                _var = ""
                if var in list(self.remappedVariables.keys()):
                    # remove the __ at the beginning
                    _var = var[len(self.remappedVariables[var]) :]
                else:
                    _var = var

                _cut_cat.mkdir(_var)
                f.cd("/" + cut_cat + "/" + _var)
                for sampleName in list(self.results[cut_cat][var].keys()):
                    # should first merge histos
                    mergedHistos = {}
                    for index in list(self.results[cut_cat][var][sampleName].keys()):
                        for hname in list(
                            self.results[cut_cat][var][sampleName][index].keys()
                        ):
                            if hname not in mergedHistos.keys():
                                mergedHistos[hname] = self.results[cut_cat][var][
                                    sampleName
                                ][index][hname].Clone()
                            else:
                                mergedHistos[hname].Add(
                                    self.results[cut_cat][var][sampleName][index][hname]
                                )

                    for hname in mergedHistos.keys():
                        if hname == "nominal":
                            _string = "histo_" + sampleName
                            mergedHistos[hname].SetName(_string)
                            mergedHistos[hname].SetTitle(_string)
                        else:
                            _string = "histo_" + sampleName + "_" + hname
                            mergedHistos[hname].SetName(_string)
                            mergedHistos[hname].SetTitle(_string)
                        mergedHistos[hname].Write()
        f.Close()

        # clean snapshot files
        proc = ""
        for _file in toBeDeleted:
            proc += f" rm {_file}; "
        import subprocess

        _proc = subprocess.Popen(proc, shell=True)
        _proc.wait()

        # print(proc)

    def mergeSaveResults(self):
        f = ROOT.TFile(self.outputFileMap, "recreate")
        for cut_cat in list(self.results.keys()):
            _cut_cat = f.mkdir(cut_cat)
            for var in list(self.results[cut_cat].keys()):
                if "tree" in self.variables[var].keys():
                    # no need to process SnapShots
                    continue
                _cut_cat.mkdir(var)
                f.cd("/" + cut_cat + "/" + var)
                for sampleName in list(self.results[cut_cat][var].keys()):
                    # should first merge histos
                    mergedHistos = {}
                    for index in list(self.results[cut_cat][var][sampleName].keys()):
                        for hname in list(
                            self.results[cut_cat][var][sampleName][index].keys()
                        ):
                            if hname not in mergedHistos.keys():
                                mergedHistos[hname] = self.results[cut_cat][var][
                                    sampleName
                                ][index][hname].Clone()
                            else:
                                mergedHistos[hname].Add(
                                    self.results[cut_cat][var][sampleName][index][hname]
                                )

                    for hname in mergedHistos.keys():
                        if hname == "nominal":
                            mergedHistos[hname].SetName("histo_" + sampleName)
                        else:
                            mergedHistos[hname].SetName(
                                "histo_" + sampleName + "_" + hname
                            )
                        mergedHistos[hname].Write()
        f.Close()

    def mergeAndSaveResults(self):
        f = ROOT.TFile(self.outputFileMap, "recreate")
        for cut_cat in list(self.results.keys()):
            _cut_cat = f.mkdir(cut_cat)
            for var in list(self.results[cut_cat].keys()):
                if "tree" in self.variables[var].keys():
                    # no need to process SnapShots
                    continue
                _cut_cat.mkdir(var)
                f.cd("/" + cut_cat + "/" + var)
                for sampleName in list(self.results[cut_cat][var].keys()):
                    # should first merge histos
                    mergedHistos = {}
                    for index in list(self.results[cut_cat][var][sampleName].keys()):
                        for hname in list(
                            self.results[cut_cat][var][sampleName][index].keys()
                        ):
                            if hname not in mergedHistos.keys():
                                mergedHistos[hname] = self.results[cut_cat][var][
                                    sampleName
                                ][index][hname].Clone()
                            else:
                                mergedHistos[hname].Add(
                                    self.results[cut_cat][var][sampleName][index][hname]
                                )

                    for hname in mergedHistos.keys():
                        if hname == "nominal":
                            mergedHistos[hname].SetName("histo_" + sampleName)
                        else:
                            mergedHistos[hname].SetName(
                                "histo_" + sampleName + "_" + hname
                            )
                        mergedHistos[hname].Write()
        f.Close()

    def run(self):
        """
        Runs the analysis:

        1. load the aliases without the ``afterNuis`` option
        2. load the ``suffix`` systematics

        3. load the alias ``weight``
        4. load the reweight systematics (they need the ``weight`` to be defined)

        5. finally load the suffix systematics with the ``afterNuis`` option


        After this important procedure it filters with ``preselection`` the many ``dfs``, loads systematics
        loads ``variables``, creates the results dict, splits the samples, creates the cuts/var histos, runs the dataframes
        and saves results.
        """
        # load all aliases needed before nuisances of type suffix
        self.loadAliases()
        self.loadSystematicsSuffix()

        # load alias weight needed before nuisances of type weight
        self.loadAliasWeight()
        self.loadSystematicsReweights()
        self.loadSystematicsReweightsEnvelopeRMS()

        # load all aliases remaining
        self.loadAliases(True)

        # apply preselections
        for sampleName in self.dfs.keys():
            for index in self.dfs[sampleName].keys():
                self.dfs[sampleName][index]["df"] = self.dfs[sampleName][index][
                    "df"
                # ].Filter("(" + self.preselections + ") && abs(weight) > 0.0")
                ].Filter("(" + self.preselections + ")")

        self.loadVariables()
        self.loadBranches()
        print("loaded all variables")
        self.createResults()
        print("created empty results dict")
        #print(self.cuts)
        #print(self.results)
        self.splitSubsamples()
        print("splitted samples")
        self.create_cuts_vars()
        print("created cuts")

        """
        # FIXME RunGraphs can't handle results of VaraitionsFor, ask Vincenzo about it

        # collect all the dataframes and run them
        dfs = []
        for cut in self.cuts.keys():
            for var in self.variables.keys():
                for sampleName in self.dfs.keys():
                    for index in self.dfs[sampleName].keys():
                        # dfs.append(self.results[cut][var][sampleName][index])
                        dfs.extend(
                            list(self.results[cut][var][sampleName][index].values()))
        ROOT.RDF.RunGraphs(dfs)
        """

        counts = []
        # register number of events in each df
        for sampleName in self.dfs.keys():
            for index in self.dfs[sampleName].keys():
                counts.append(self.dfs[sampleName][index]["df"].Count())

        snapshots = []
        for cut in self.cuts.keys():
            for var in self.variables.keys():
                if (
                    len(self.results[cut].get(var, [])) == 0
                    or "tree" not in self.variables[var].keys()
                ):
                    # no snapshots for this combination of cut variable
                    continue

                for sampleName in self.dfs.keys():
                    for index in self.dfs[sampleName].keys():
                        # dfs.append(self.results[cut][var][sampleName][index])
                        snapshots.append(self.results[cut][var][sampleName][index])

        if len(snapshots) != 0:
            ROOT.RDF.RunGraphs(snapshots)

        for count in counts:
            print("Number of events passing preselections:", count.GetValue())
        #print(self.results)

        self.convertResults()
        self.saveResults()


if __name__ == "__main__":
    ROOT.gInterpreter.Declare('#include "headers.hh"')
    exec(open("script.py").read())
    runner = RunAnalysis(samples, aliases, variables, cuts, nuisances, lumi)
    runner.run()
	"""
	Main script for the creation of shapes, starting from a configuration folder.

	It gives the option to compile the configuration folder and save it as both JSON and pickle file.

	It also gives the option to run the analysis in batch mode, or to check for errors in the batch submission.

	The analysis can be run in batch mode or locally.

	If run in batch mode it gives the ability to merge the output root files.
	"""
	import sys
	from pathlib import Path
	import argparse
	import os
	import glob
	import subprocess
	import ROOT

	ROOT.gROOT.SetBatch(True)

	headersPath = os.path.dirname(os.path.dirname(__file__)) + "/include/headers.hh"
	ROOT.gInterpreter.Declare(f'#include "{headersPath}"')

	import numpy as np


	# def hist2array(h, overflow=False, copy=True):
	# shift = 0
	# if overflow:
	# shift = 1
	# d = []
	# for i in range(1 - shift, h.GetNbinsX() + 1 + shift):
	# d.append(h.GetBinContent(i))
	# d = np.array(d)
	# return d

	def hist2array(h, flow=False, copy=True, include_sumw2=False):
	nx = h.GetNbinsX() + 2
	dtype=0
	if isinstance(h, ROOT.TH1D):
	dtype=np.double
	elif isinstance(h, ROOT.TH1F):
	dtype=np.float
	elif isinstance(h, ROOT.TH1I):
	dtype=np.int
	else:
	print('Histogram of type', h, 'is not supperted', file=sys.stderr)
	sys.exit(1)

	vals = np.ndarray((nx, ), dtype=dtype, buffer=h.GetArray())
	sumw2 = np.ndarray((nx, ), dtype=dtype, buffer=h.GetSumw2().GetArray())

	shift = 1
	if flow:
	shift = 0
	vals = vals[shift: nx-shift]
	sum2 = sumw2[shift: nx-shift]

	if copy:
	vals = vals.copy()
	sumw2 = sumw2.copy()
	if include_sumw2:
	return vals, sumw2
	else:
	return vals

	def defaultParser():
	parser = argparse.ArgumentParser()

	parser.add_argument(
	"-c",
	"--compile",
	type=int,
	choices=[0, 1],
	help="0 compile configuration and save JSON, 1 load compiled configuration",
	required=False,
	default=0,
	)

	parser.add_argument(
	"-o",
	"--operationMode",
	type=int,
	choices=[0, 1, 2],
	help="0 do analysis in batch, 1 check for errors in batch submission, "
	"2 hadd root files",
	required=False,
	default=-1,
	)
	parser.add_argument(
	"-b",
	"--doBatch",
	help="0 (default) runs on local, 1 runs with condor",
	required=False,
	default="0",
	)

	parser.add_argument(
	"-dR",
	"--dryRun",
	choices=[0, 1],
	type=int,
	help="1 do not submit to condor",
	required=False,
	default=0,
	)

	parser.add_argument(
	"-f", "--folder", help="Path to folder", required=False, default="."
	)

	parser.add_argument(
	"-configs",
	"--configsFolder",
	help="Path to configurations folder",
	required=False,
	default="configs",
	)

	parser.add_argument(
	"-config",
	"--configFile",
	help="Path to configuration JSON file to load",
	required=False,
	default="latest",
	)

	parser.add_argument(
	"-l",
	"--limitEvents",
	type=int,
	help="Max number of events",
	required=False,
	default=-1,
	)

	parser.add_argument(
	"-r",
	"--resubmit",
	choices=[0, 1, 2],
	type=int,
	help="Resubmit jobs, 1 resubmit finished jobs with errors, 2 resubmit running jobs",
	required=False,
	default="0",
	)
	return parser


	def main():
	parser = defaultParser()
	args = parser.parse_args()

	compileFolder = args.compile
	operationMode = args.operationMode
	doBatch = int(args.doBatch)
	dryRun = int(args.dryRun)
	global folder
	folder = os.path.abspath(args.folder)
	configsFolder = os.path.abspath(args.folder + "/" + args.configsFolder)
	configFile = args.configFile
	resubmit = int(args.resubmit)

	global batchFolder
	global outputFolder

	if compileFolder == 1:
	print(os.getcwd())
	print(os.path.abspath(f"{folder}/configuration.py"))
	if not os.path.exists(folder) or not os.path.exists(
	f"{folder}/configuration.py"
	):
	print("Error, configuration folder does not exists!")
	sys.exit()

	from mkShapesRDF.shapeAnalysis.ConfigLib import ConfigLib

	# variables before execution of files
	configVars1 = dict(list(globals().items()) + list(locals().items()))

	ConfigLib.loadConfig(["configuration.py"], globals())
	ConfigLib.loadConfig(filesToExec, globals(), imports)

	globals()["varsToKeep"].insert(0, "folder")

	d = ConfigLib.createConfigDict(
	varsToKeep, dict(list(globals().items()) + list(locals().items()))
	)

	# variables after execution of files
	configVars2 = dict(list(globals().items()) + list(locals().items()))

	import datetime

	dt = datetime.datetime.now()
	Path(configsFolder).mkdir(parents=True, exist_ok=True)

	fileName = configsFolder + "/config_" + dt.strftime("%y-%m-%d_%H_%M_%S")
	fileNameJson = configsFolder + "/config"

	ConfigLib.dumpConfigDict(d, fileName)
	ConfigLib.dumpConfigDict(d, fileNameJson, doJson=True)

	ConfigLib.loadDict(d, globals())

	else:
	from mkShapesRDF.shapeAnalysis.ConfigLib import ConfigLib

	if configFile != "latest":
	p = os.path.abspath(configFile)
	if not os.path.exists(p):
	print("Config file", configFile, "doest not exists at path", p)
	sys.exit()
	else:
	d = ConfigLib.loadPickle(p, globals())
	else:
	d = ConfigLib.loadLatestPickle(configsFolder, globals())

	print(samples.keys())
	print(d.keys())

	print("\n\n", batchVars, "\n\n")

	batchFolder = f"{folder}/{batchFolder}"

	Path(f"{folder}/{outputFolder}").mkdir(parents=True, exist_ok=True)

	if operationMode == 2 and os.path.exists(f"{folder}/{outputFolder}/{outputFile}"):
	pass
	# print("Can't merge files, output already exists")
	# print(f"You can run: rm {folder}/{outputFolder}/{outputFile}")
	# sys.exit()

	limit = int(args.limitEvents)

	# PROCESSING
	if runnerFile == "default":
	runnerPath = os.path.realpath(os.path.dirname(__file__)) + "/runner.py"
	else:
	runnerPath = f"{folder}/{runnerFile}"
	print("\n\nRunner path: ", runnerPath, "\n\n")
	if not os.path.exists(runnerPath):
	print("Runner file / path does not exist!")
	sys.exit()

	_results = {}
	sys.path.append(os.path.dirname(runnerPath))
	from runner import RunAnalysis

	if operationMode == 0:
	print("#" * 20, "\n\n", " Doing analysis", "\n\n", "#" * 20)

	if doBatch == 1:
	print("#" * 20, "\n\n", " Running on condor ", "\n\n", "#" * 20)

	_samples = RunAnalysis.splitSamples(samples)

	from mkShapesRDF.shapeAnalysis.BatchSubmission import BatchSubmission

	outputPath = os.path.abspath(outputFolder)

	batch = BatchSubmission(
	outputPath,
	batchFolder,
	headersPath,
	runnerPath,
	tag,
	_samples,
	d,
	batchVars,
	)
	batch.createBatches()
	batch.submit(dryRun)

	else:
	print("#" * 20, "\n\n", " Running on local machine ", "\n\n", "#" * 20)

	outputFileMap = f"{folder}/{outputFolder}/{outputFile}"

	_samples = RunAnalysis.splitSamples(samples, False)
	print(len(_samples))

	global cuts
	runner = RunAnalysis(
	_samples,
	aliases,
	variables,
	cuts,
	nuisances,
	lumi,
	limit,
	outputFileMap,
	)
	runner.run()

	elif operationMode == 1:
	errs = glob.glob("{}/{}/*/err.txt".format(batchFolder, tag))
	files = glob.glob("{}/{}/*/script.py".format(batchFolder, tag))

	errsD = list(map(lambda k: "/".join(k.split("/")[:-1]), errs))
	filesD = list(map(lambda k: "/".join(k.split("/")[:-1]), files))
	# print(files)
	notFinished = list(set(filesD).difference(set(errsD)))
	print(notFinished)
	tabulated = []
	tabulated.append(["Total jobs", "Finished jobs", "Running jobs"])
	tabulated.append([len(files), len(errs), len(notFinished)])
	import tabulate

	print(tabulate.tabulate(tabulated))
	# print('queue 1 Folder in ' + ' '.join(list(map(lambda k: k.split('/')[-1], notFinished))))
	normalErrs = """Warning in <TClass::Init>: no dictionary for class edm::ProcessHistory is available
	Warning in <TClass::Init>: no dictionary for class edm::ProcessConfiguration is available
	Warning in <TClass::Init>: no dictionary for class edm::ParameterSetBlob is available
	Warning in <TClass::Init>: no dictionary for class edm::Hash<1> is available
	Warning in <TClass::Init>: no dictionary for class pair<edm::Hash<1>,edm::ParameterSetBlob> is available
	Warning in <TInterpreter::ReadRootmapFile>: class podio::
	TClass::Init:0: RuntimeWarning:
	real
	user
	sys
	run locally
	No TTree was created for
	Warning in <Snapshot>: A lazy Snapshot action was booked but never triggered.
	cling::DynamicLibraryManager::loadLibrary(): libOpenGL.so.0: cannot open shared object file: No such file or directory
	Error in <AutoloadLibraryMU>: Failed to load library /cvmfs/sft.cern.ch/lcg/releases/ROOT/6.28.00
	cling::DynamicLibraryManager::loadLibrary(): libGLU.so.1: cannot open shared object file: No such file or directory
	Error in <TNetXNGFile::Close>:
	Warning in <TChain::AddFile>: Adding tree with no entries from file
	Plugin No such file or directory loading sec.protocol libXrdSecztn.so
	"""
	normalErrs = normalErrs.split("\n")
	normalErrs = list(map(lambda k: k.strip(" ").strip("\t"), normalErrs))
	normalErrs = list(filter(lambda k: k != "", normalErrs))

	toResubmit = []

	def normalErrsF(k):
	for s in normalErrs:
	if s in k:
	return True
	elif k.startswith(s):
	return True
	return False

	for err in errs:
	try:
	with open(err) as file:
	lines = file.read()
	except:
	print('Could not open error file for', err, 'continue')
	toResubmit.append(err)
	continue
	txt = lines.split("\n")
	# txt = list(filter(lambda k: k not in normalErrs, txt))
	txt = list(filter(lambda k: not normalErrsF(k), txt))
	txt = list(filter(lambda k: k.strip() != "", txt))
	if len(txt) > 0:
	print("Found unusual error in")
	print(err)
	print("\n")
	print("\n".join(txt[:5] + ['\n...\n'] + txt[-5:]))
	print("\n\n")
	toResubmit.append(err)
	toResubmit = list(map(lambda k: "".join(k.split("/")[-2]), toResubmit))
	print(toResubmit)
	if len(toResubmit) > 0:
	print("\n\nShould resubmit the following files\n")
	print(
	"queue 1 Folder in "
	+ " ".join(list(map(lambda k: k.split("/")[-1], toResubmit)))
	)
	if resubmit == 1:
	from mkShapesRDF.shapeAnalysis.BatchSubmission import BatchSubmission

	BatchSubmission.resubmitJobs(batchFolder, tag, toResubmit, dryRun)

	if resubmit == 2:
	# resubmit all the jobs that are not finished
	toResubmit = list(map(lambda k: "".join(k.split("/")[-1]), notFinished))
	print(toResubmit)
	from mkShapesRDF.shapeAnalysis.BatchSubmission import BatchSubmission

	BatchSubmission.resubmitJobs(batchFolder, tag, toResubmit, dryRun)

	elif operationMode == 2:
	print(
	"",
	"".join(["#" for _ in range(20)]),
	"\n\n",
	"Merging root files",
	"\n\n",
	"".join(["#" for _ in range(20)]),
	)

	_samples = RunAnalysis.splitSamples(samples)
	print(_samples)
	__samples = list(map(lambda k: k[0] + '_' + str(k[3]), _samples))
	#__samples = list(set(__samples).difference(['DY_23', 'DY_19']) )
	print(len(__samples))
	outputFileTrunc = ".".join(outputFile.split(".")[:-1])
	filesToMerge = list(
	map(
	#lambda k: f"{folder}/{outputFolder}/{outputFileTrunc}__ALL__{k[0]}_{str(k[3])}.root",
	lambda k: f"{folder}/{outputFolder}/{outputFileTrunc}__ALL__{k}.root",
	__samples,
	)
	)

	print("\n\nMerging files\n\n")
	print("\n\n", filesToMerge, "\n\n")

	print(f"Hadding files into {folder}/{outputFolder}/{outputFile}")
	ret = subprocess.run(
	f'hadd -j {folder}/{outputFolder}/{outputFile} {" ".join(filesToMerge)}',
	shell=True,
	)

	# if True:
	if ret.returncode == 0:
	# ok = True
	# if ok:
	print("Hadd was successful")
	import mkShapesRDF.shapeAnalysis.latinos.LatinosUtils as utils

	f = ROOT.TFile.Open(f"{outputFolder}/{outputFile}", "update")
	# post process -> nuisance envelops and RMS
	cuts = cuts['cuts']
	categoriesmap = utils.flatten_cuts(cuts)
	for nuisance in nuisances.keys():
	if not (
	nuisances[nuisance].get("kind", "").endswith("envelope")
	or nuisances[nuisance].get("kind", "").endswith("rms")
	or nuisances[nuisance].get("kind", "").endswith("square")
	):
	continue
	print('work for ', nuisance)
	#categoriesmap = utils.flatten_cuts(cuts)
	subsamplesmap = utils.flatten_samples(samples)

	utils.update_variables_with_categories(variables, categoriesmap)
	utils.update_nuisances_with_subsamples(nuisances, subsamplesmap)
	utils.update_nuisances_with_categories(nuisances, categoriesmap)
	# sys.exit()
	_cuts = list(cuts.keys())
	_samples = list(samples.keys())
	# print(cuts['cuts'])
	# print(variables)
	# print(utils.flatten_cuts(cuts['cuts']))
	for cut in _cuts:
	# print('work in', cut)
	# ROOT.gDirectory.cd(f"/{cut}")
	# for variable in [k.GetName() for k in ROOT.gDirectory.GetListOfKeys()]:
	for variable in variables.keys():
	f.cd(f"/{cut}/{variable}")
	print('work in ', cut, variable)
	histos = [k.GetName() for k in ROOT.gDirectory.GetListOfKeys()]
	for sampleName in _samples:
	limitSamples = nuisances[nuisance].get('samples', {})
	if not (len(limitSamples.keys()) == 0 or sampleName in limitSamples.keys()):
	# print(f'{sampleName} does not have nuisance {nuisance}')
	continue
	histosNameToProcess = list(
	filter(
	lambda k: k.startswith(
	f"histo_{sampleName}_{nuisances[nuisance]['name']}_SPECIAL_NUIS"
	),
	histos,
	)
	)
	histosToProcess = list(
	map(
	lambda k: ROOT.gDirectory.Get(k).Clone(),
	histosNameToProcess,
	)
	)
	if len(histosToProcess) == 0:
	print(f'No variations found for {sampleName} in {cut}/{variable} for nuisance {nuisances[nuisance]["name"]}', file=sys.stderr,)
	continue

	sys.exit(1)
	hNominal = ROOT.gDirectory.Get(
	f"histo_{sampleName}"
	).Clone()

	hName = f"histo_{sampleName}_{nuisances[nuisance]['name']}"
	h_up = histosToProcess[0].Clone()
	h_do = histosToProcess[0].Clone()
	variations = np.empty(
	(
	len(histosToProcess),
	histosToProcess[0].GetNbinsX() + 2,
	),
	dtype=float,
	)
	for i in range(len(histosToProcess)):
	variations[i, :] = hist2array(
	histosToProcess[i], flow=True
	)
	arrup = 0
	arrdo = 0
	if nuisances[nuisance]['kind'].endswith("envelope"):
	arrup = np.max(variations, axis=0)
	arrdo = np.min(variations, axis=0)
	elif nuisances[nuisance]['kind'].endswith("rms"):
	vnominal = hist2array(hNominal, flow=True)
	arrnom = np.tile(vnominal, (variations.shape[0], 1))
	arrv = np.sqrt(
	np.mean(np.square(variations - arrnom), axis=0)
	)
	arrup = vnominal + arrv
	arrdo = vnominal - arrv
	elif nuisances[nuisance]['kind'].endswith("square"):
	vnominal = hist2array(hNominal, flow=True)
	arrnom = np.tile(vnominal, (variations.shape[0], 1))
	up_is_up = variations>arrnom
	zeros = np.zeros_like(arrnom)
	arrv = np.sqrt(
	np.sum(np.square(variations - arrnom), axis=0)
	)
	arrup = vnominal + arrv
	arrdo = vnominal - arrv
	# arrup = np.where(up_is_up, vnominal + arrv, vnominal - arrv)
	# arrdo = np.where(~up_is_up, vnominal - arrv, vnominal + arrv)
	else:
	continue
	print(arrup)
	print(arrdo)
	for i in range(0, h_up.GetNbinsX() + 2):
	h_up.SetBinContent(i, arrup[i])
	h_do.SetBinContent(i, arrdo[i])
	print(hName)
	h_up.SetName(hName + "Up")
	h_up.Write()
	h_do.SetName(hName + "Down")
	h_do.Write()
	for histo in histosNameToProcess:
	ROOT.gDirectory.Delete(f"{histo};*")
	f.Close()

	else:
	print("Operating mode was set to -1, nothing was done")


	if __name__ == "__main__":
	main()