AlexElykov/README.md

## README.md

      
    Raw
  

              README.md
            
          
    Running the script

-s or -f are required fields
py .\hv_plotting.py -s 2
py .\hv_plotting.py -f "test1.csv" test2.csv"
py .\hv_plotting.py -p "\path\to\data" -f "test.csv" "test2.csv"
Installing new software using pip on windows

py -m pip install jupyter
py -m notebook

  
## hv_plotting.py
import os
import argparse
import datetime
import numpy as np
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt

class DataHandler(object):
    """ Class to do some simple data access handling """

    def __init__(self, data_path=None) -> None:
        if not data_path:
            self.data_path = "../data/sc_storage_test/"
        else:
            self.data_path = data_path

        if os.path.isdir(self.data_path):
            pass
        else:
            raise FileNotFoundError(f"Directory {self.data_path} doesn't exist")

    def get_chosen_files(self, files) -> list:
        """ Get the user selected .csv files """

        # Couldn't think of a nicer way to do this for lists of different length
        res = []
        avail_files = self.find_data_files()
        for file in files:
            for j in avail_files:
                # Compare the names of the files
                if file == j[2]:
                    res.append(j)
        return res

    def find_data_files(self) -> list:
        """ Find all test .csv files in each data in the directory path """
        test_doc_paths = []
        test_doc_names = []
        test_dates = []

        # find .csv files in all available dirs and get their creation times
        for path in Path(self.data_path).rglob("*.csv"):
            test_doc_names.append(path.name)
            test_doc_paths.append(path)
            c_time = os.path.getctime(path)
            test_dates.append(datetime.datetime.fromtimestamp(c_time))
            temp_list = list(zip(test_dates, test_doc_paths, test_doc_names))
            # sort by the creation! date of the file
            res_list = sorted(temp_list, key = lambda x: x[0])

        return res_list

    @staticmethod
    def raw_to_df(file_name, file_date) -> pd.DataFrame:
        """ Convert raw .csv data file to a useful dataframe """

        data_types = {"time [s]": str,
                      "set_voltage [v]": np.float64,
                      "voltage [v]": np.float64,
                      "ramp [v/s]": np.float64,
                      "current [mA]": np.float64,
                      "current_cap": str,
                      "warn": str,
                      "error": str
                      }

        # read data and skip some usually poorly behaved rows
        df = pd.read_csv(file_name,
                         delimiter='\t;',
                         names=list(data_types.keys()),
                         dtype=data_types,
                         usecols=[*range(8)],
                         skiprows=list(range(5)),
                         engine="python"
                         )

        df["time [s]"] = [datetime.datetime.combine((file_date),
                                                    datetime.datetime.strptime(t, '%H:%M:%S').time()) for t in df['time [s]']]
        times = df['time [s]']
        start_time = df['time [s]'].values[0]
        df["dt [s]"] = [(t - start_time).total_seconds() for t in times]

        return df

class MakePlot(object):
    """ Generic plotting functions - add more if you need them. """

    def plot_t_v(self, df, ax, **args) -> None:
        """ Plot voltage vs. time in a ramp """

        t_start = df["time [s]"].values[0]
        times = df["time [s]"]
        ln = ax.plot([(t - t_start).total_seconds() for t in times],
                     [-i/1000 for i in df["voltage [v]"]], **args)
        ax.set_xlabel("Time [s]")
        ax.set_ylabel("Voltage [kV]")


    def plot_t_i(self, df, ax, **args) -> None:
        """ Plot current vs. time in a ramp """
        t_start = df["time [s]"].values[0]
        times = df["time [s]"]

        ln = ax.plot([(t - t_start).total_seconds() for t in times],
                     [-i*1000 for i in df["current [mA]"]], **args)
        ax.set_xlabel("Time [s]")
        ax.set_ylabel("Current [uA]")
        ax.set_yscale("log")


def parse_args():
    """ Input arguments for the main function """
    parser = argparse.ArgumentParser(prog="SC Data Plotter",
                                     description="Help for data plotter ... ")
    group = parser.add_mutually_exclusive_group(required=True)
    parser.add_argument("--path", "-p", help="path to the overarching data directory",
                        default=None,
                        type=str)
    group.add_argument("--files", "-f", help="list of files to plot", nargs="*")
    group.add_argument("--selection", "-s", help="select last N files to plot",
                        type=int)

    return parser.parse_args()


def main():
    """ Main function selecting what to do based on user input """
    args = parse_args()

    # Get new data path for .csv files
    if args.path:
        sc_files_path = args.path
    else:
        sc_files_path = None

    # Init the classes defined above
    DH = DataHandler(data_path=sc_files_path)
    MP = MakePlot()

    # Use a list of file names to load their data
    selected_file_info = []
    file_list = []

    if args.files:
        print(f"Getting the files {args.files}")
        for file_ in args.files:
            file_list.append(file_)
        selected_file_info = DH.get_chosen_files(file_list)

    # Load the data from last N files
    if args.selection:
        print(f"Plotting last {args.selection} files")
        n_select = args.selection
        file_info = DH.find_data_files()
        selected_file_info = file_info[-n_select:]

    # Plotting the data we've got above
    plt.figure(figsize=(14, 7), facecolor="w")
    ax = plt.subplot(121)
    ax1 = plt.subplot(122)

    # selected_file_info[N]; 0 : file_date, 1 : file_path, 2 : file_name
    if selected_file_info:
        for f in selected_file_info:
            # get the dataframe and plot the data

            print(f"Plotting: {f[2]}")
            df = DH.raw_to_df(f[1], f[0])
            MP.plot_t_v(df, ax, label=f[2])
            MP.plot_t_i(df, ax1, label=f[2])
        ax.legend()
        plt.show()
    else:
        raise ValueError(f"Something went wrong the data list is empty ...")
    return args

if __name__ == "__main__":
    """ Running the entire thing """
    main()
	import os
	import argparse
	import datetime
	import numpy as np
	import pandas as pd
	from pathlib import Path
	import matplotlib.pyplot as plt

	class DataHandler(object):
	""" Class to do some simple data access handling """

	def __init__(self, data_path=None) -> None:
	if not data_path:
	self.data_path = "../data/sc_storage_test/"
	else:
	self.data_path = data_path

	if os.path.isdir(self.data_path):
	pass
	else:
	raise FileNotFoundError(f"Directory {self.data_path} doesn't exist")

	def get_chosen_files(self, files) -> list:
	""" Get the user selected .csv files """

	# Couldn't think of a nicer way to do this for lists of different length
	res = []
	avail_files = self.find_data_files()
	for file in files:
	for j in avail_files:
	# Compare the names of the files
	if file == j[2]:
	res.append(j)
	return res

	def find_data_files(self) -> list:
	""" Find all test .csv files in each data in the directory path """
	test_doc_paths = []
	test_doc_names = []
	test_dates = []

	# find .csv files in all available dirs and get their creation times
	for path in Path(self.data_path).rglob("*.csv"):
	test_doc_names.append(path.name)
	test_doc_paths.append(path)
	c_time = os.path.getctime(path)
	test_dates.append(datetime.datetime.fromtimestamp(c_time))
	temp_list = list(zip(test_dates, test_doc_paths, test_doc_names))
	# sort by the creation! date of the file
	res_list = sorted(temp_list, key = lambda x: x[0])

	return res_list

	@staticmethod
	def raw_to_df(file_name, file_date) -> pd.DataFrame:
	""" Convert raw .csv data file to a useful dataframe """

	data_types = {"time [s]": str,
	"set_voltage [v]": np.float64,
	"voltage [v]": np.float64,
	"ramp [v/s]": np.float64,
	"current [mA]": np.float64,
	"current_cap": str,
	"warn": str,
	"error": str
	}

	# read data and skip some usually poorly behaved rows
	df = pd.read_csv(file_name,
	delimiter='\t;',
	names=list(data_types.keys()),
	dtype=data_types,
	usecols=[*range(8)],
	skiprows=list(range(5)),
	engine="python"
	)

	df["time [s]"] = [datetime.datetime.combine((file_date),
	datetime.datetime.strptime(t, '%H:%M:%S').time()) for t in df['time [s]']]
	times = df['time [s]']
	start_time = df['time [s]'].values[0]
	df["dt [s]"] = [(t - start_time).total_seconds() for t in times]

	return df

	class MakePlot(object):
	""" Generic plotting functions - add more if you need them. """

	def plot_t_v(self, df, ax, **args) -> None:
	""" Plot voltage vs. time in a ramp """

	t_start = df["time [s]"].values[0]
	times = df["time [s]"]
	ln = ax.plot([(t - t_start).total_seconds() for t in times],
	[-i/1000 for i in df["voltage [v]"]], **args)
	ax.set_xlabel("Time [s]")
	ax.set_ylabel("Voltage [kV]")


	def plot_t_i(self, df, ax, **args) -> None:
	""" Plot current vs. time in a ramp """
	t_start = df["time [s]"].values[0]
	times = df["time [s]"]

	ln = ax.plot([(t - t_start).total_seconds() for t in times],
	[-i1000 for i in df["current [mA]"]], *args)
	ax.set_xlabel("Time [s]")
	ax.set_ylabel("Current [uA]")
	ax.set_yscale("log")


	def parse_args():
	""" Input arguments for the main function """
	parser = argparse.ArgumentParser(prog="SC Data Plotter",
	description="Help for data plotter ... ")
	group = parser.add_mutually_exclusive_group(required=True)
	parser.add_argument("--path", "-p", help="path to the overarching data directory",
	default=None,
	type=str)
	group.add_argument("--files", "-f", help="list of files to plot", nargs="*")
	group.add_argument("--selection", "-s", help="select last N files to plot",
	type=int)

	return parser.parse_args()


	def main():
	""" Main function selecting what to do based on user input """
	args = parse_args()

	# Get new data path for .csv files
	if args.path:
	sc_files_path = args.path
	else:
	sc_files_path = None

	# Init the classes defined above
	DH = DataHandler(data_path=sc_files_path)
	MP = MakePlot()

	# Use a list of file names to load their data
	selected_file_info = []
	file_list = []

	if args.files:
	print(f"Getting the files {args.files}")
	for file_ in args.files:
	file_list.append(file_)
	selected_file_info = DH.get_chosen_files(file_list)

	# Load the data from last N files
	if args.selection:
	print(f"Plotting last {args.selection} files")
	n_select = args.selection
	file_info = DH.find_data_files()
	selected_file_info = file_info[-n_select:]

	# Plotting the data we've got above
	plt.figure(figsize=(14, 7), facecolor="w")
	ax = plt.subplot(121)
	ax1 = plt.subplot(122)

	# selected_file_info[N]; 0 : file_date, 1 : file_path, 2 : file_name
	if selected_file_info:
	for f in selected_file_info:
	# get the dataframe and plot the data

	print(f"Plotting: {f[2]}")
	df = DH.raw_to_df(f[1], f[0])
	MP.plot_t_v(df, ax, label=f[2])
	MP.plot_t_i(df, ax1, label=f[2])
	ax.legend()
	plt.show()
	else:
	raise ValueError(f"Something went wrong the data list is empty ...")
	return args

	if __name__ == "__main__":
	""" Running the entire thing """
	main()