mikelgg93/filter_Core_pupil_positions.py

## filter_Core_pupil_positions.py
import os
import tkinter as tk
from tkinter import filedialog
import logging
import argparse
import pandas as pd
import numpy as np

# Preparing the logger
logging.getLogger("remove_confidence_below_threshold")
logging.basicConfig(
    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", level=logging.INFO
)


def get_path():
    """UI to get the path to the folder containing the pupil_positions.csv, blinks.csv and gaze_positions.csv files."""
    root = tk.Tk()
    root.withdraw()
    msg = "Select the export directory from Player"
    arguments = {"title": msg}
    # if platform.system() == "Darwin":
    #     arguments["message"] = msg
    path = filedialog.askdirectory(**arguments)
    # check if the folder contains the required files
    if (
        not os.path.exists(os.path.join(path, "blinks.csv"))
        or not os.path.exists(os.path.join(path, "gaze_positions.csv"))
        or not os.path.exists(os.path.join(path, "pupil_positions.csv"))
    ):
        error = f"The selected folder does not contain pupil, blinks and gaze positions data. "
        logging.error(error)
        raise SystemExit(error)
    return path


def main():
    # Parse inptut arguments
    parser = argparse.ArgumentParser(
        description="Filter data from Pupil Player export for pupillometry"
    )
    parser.add_argument("--input_path", default=None, type=str)
    args = parser.parse_args()

    # If no path is provided, open a UI to select the folder
    if args.input_path is None or not os.path.exists(args.input_path):
        args.input_path = get_path()

    # Read pupil_positions, blinks and gaze_positions.csv into a dataframe
    pupil_df = pd.read_csv(os.path.join(args.input_path, "pupil_positions.csv"))
    blinks_df = pd.read_csv(os.path.join(args.input_path, "blinks.csv"))
    gaze_df = pd.read_csv(os.path.join(args.input_path, "gaze_positions.csv"))

    df = pupil_df.copy()

    # Read the blinks_info.csv to get the thresholds used for blink detection
    blinks_info = pd.read_csv(
        os.path.join(args.input_path, "blink_detection_report.csv")
    )
    blinks_exported = blinks_info["value"][
        blinks_info["key"] == "blinks_exported"
    ].to_numpy(dtype=int)
    onset_tresh = blinks_info["value"][
        blinks_info["key"] == "onset_confidence_threshold"
    ].to_numpy()
    offset_tresh = blinks_info["value"][
        blinks_info["key"] == "offset_confidence_threshold"
    ].to_numpy()

    # Remove blinks from pupil_positions, using the timestamps from blinks.csv
    logging.info(
        f"Removing {blinks_exported} blinks, these blinks were detected with onset\
        confidence threshold of {onset_tresh} and offset confidence threshold of {offset_tresh},\
        if you feel some blinks are missing, try tunning the thresholds in Player"
    )
    for blink in blinks_df.iterrows():
        blink_start = blink[1]["start_timestamp"]
        blink_end = blink[1]["end_timestamp"]
        blink_indexes = (df["pupil_timestamp"] > blink_start) & (
            df["pupil_timestamp"] < blink_end
        )
        df = df[~blink_indexes]

    # Remove method == 2d c++
    logging.info("Removing 2d c++ method from the data")
    df = df[df["method"] != "2d c++"]

    # Remove pupil positions with confidence below 0.6
    logging.info("Removing pupil positions with model confidence below 0.6")
    df = df[df["confidence"] > 0.6]
    df = df[df["model_confidence"] > 0.6]

    # Clean ups some columns, comment this section if you want all the columns
    logging.info("Cleaning up some columns")
    useful_columns = [
        "pupil_timestamp",
        "world_index",
        "eye_id",
        "diameter_3d",
        "diameter",
        "confidence",
        "method",
        "model_id",
        "model_confidence",
    ]
    df = df[useful_columns]
    logging.info(f"Summary \n {df.describe().transpose()}")

    # group by eye_id
    logging.info("Sorting by eye_id")
    df = df.sort_values(by=["eye_id", "pupil_timestamp"])

    # Print the mean confidence for each eye
    logging.info("Printing the mean confidence and mean diameter for each eye")
    logging.info(
        f"Eye mean confidence and diameter in mm: \n {df.groupby('eye_id')[['confidence', 'diameter_3d']].agg(['mean', 'max', 'min'])}"
    )

    # Save the cleaned data
    logging.info("Saving the cleaned data")
    df.to_csv(os.path.join(args.input_path, "pupil_positions_cleaned.csv"), index=False)
    logging.info(
        "Done, you can find the cleaned data at {}".format(
            os.path.join(args.input_path, "pupil_positions_cleaned.csv")
        )
    )


if __name__ == "__main__":
    main()
	import os
	import tkinter as tk
	from tkinter import filedialog
	import logging
	import argparse
	import pandas as pd
	import numpy as np

	# Preparing the logger
	logging.getLogger("remove_confidence_below_threshold")
	logging.basicConfig(
	format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", level=logging.INFO
	)


	def get_path():
	"""UI to get the path to the folder containing the pupil_positions.csv, blinks.csv and gaze_positions.csv files."""
	root = tk.Tk()
	root.withdraw()
	msg = "Select the export directory from Player"
	arguments = {"title": msg}
	# if platform.system() == "Darwin":
	# arguments["message"] = msg
	path = filedialog.askdirectory(**arguments)
	# check if the folder contains the required files
	if (
	not os.path.exists(os.path.join(path, "blinks.csv"))
	or not os.path.exists(os.path.join(path, "gaze_positions.csv"))
	or not os.path.exists(os.path.join(path, "pupil_positions.csv"))
	):
	error = f"The selected folder does not contain pupil, blinks and gaze positions data. "
	logging.error(error)
	raise SystemExit(error)
	return path


	def main():
	# Parse inptut arguments
	parser = argparse.ArgumentParser(
	description="Filter data from Pupil Player export for pupillometry"
	)
	parser.add_argument("--input_path", default=None, type=str)
	args = parser.parse_args()

	# If no path is provided, open a UI to select the folder
	if args.input_path is None or not os.path.exists(args.input_path):
	args.input_path = get_path()

	# Read pupil_positions, blinks and gaze_positions.csv into a dataframe
	pupil_df = pd.read_csv(os.path.join(args.input_path, "pupil_positions.csv"))
	blinks_df = pd.read_csv(os.path.join(args.input_path, "blinks.csv"))
	gaze_df = pd.read_csv(os.path.join(args.input_path, "gaze_positions.csv"))

	df = pupil_df.copy()

	# Read the blinks_info.csv to get the thresholds used for blink detection
	blinks_info = pd.read_csv(
	os.path.join(args.input_path, "blink_detection_report.csv")
	)
	blinks_exported = blinks_info["value"][
	blinks_info["key"] == "blinks_exported"
	].to_numpy(dtype=int)
	onset_tresh = blinks_info["value"][
	blinks_info["key"] == "onset_confidence_threshold"
	].to_numpy()
	offset_tresh = blinks_info["value"][
	blinks_info["key"] == "offset_confidence_threshold"
	].to_numpy()

	# Remove blinks from pupil_positions, using the timestamps from blinks.csv
	logging.info(
	f"Removing {blinks_exported} blinks, these blinks were detected with onset\
	confidence threshold of {onset_tresh} and offset confidence threshold of {offset_tresh},\
	if you feel some blinks are missing, try tunning the thresholds in Player"
	)
	for blink in blinks_df.iterrows():
	blink_start = blink[1]["start_timestamp"]
	blink_end = blink[1]["end_timestamp"]
	blink_indexes = (df["pupil_timestamp"] > blink_start) & (
	df["pupil_timestamp"] < blink_end
	)
	df = df[~blink_indexes]

	# Remove method == 2d c++
	logging.info("Removing 2d c++ method from the data")
	df = df[df["method"] != "2d c++"]

	# Remove pupil positions with confidence below 0.6
	logging.info("Removing pupil positions with model confidence below 0.6")
	df = df[df["confidence"] > 0.6]
	df = df[df["model_confidence"] > 0.6]

	# Clean ups some columns, comment this section if you want all the columns
	logging.info("Cleaning up some columns")
	useful_columns = [
	"pupil_timestamp",
	"world_index",
	"eye_id",
	"diameter_3d",
	"diameter",
	"confidence",
	"method",
	"model_id",
	"model_confidence",
	]
	df = df[useful_columns]
	logging.info(f"Summary \n {df.describe().transpose()}")

	# group by eye_id
	logging.info("Sorting by eye_id")
	df = df.sort_values(by=["eye_id", "pupil_timestamp"])

	# Print the mean confidence for each eye
	logging.info("Printing the mean confidence and mean diameter for each eye")
	logging.info(
	f"Eye mean confidence and diameter in mm: \n {df.groupby('eye_id')[['confidence', 'diameter_3d']].agg(['mean', 'max', 'min'])}"
	)

	# Save the cleaned data
	logging.info("Saving the cleaned data")
	df.to_csv(os.path.join(args.input_path, "pupil_positions_cleaned.csv"), index=False)
	logging.info(
	"Done, you can find the cleaned data at {}".format(
	os.path.join(args.input_path, "pupil_positions_cleaned.csv")
	)
	)


	if __name__ == "__main__":
	main()