data = pd.read_csv(label_file) # Converts the format of each label in the dataframe from "LabelA|LabelB|LabelC" # into ["LabelA", "LabelB", "LabelC"], concatenates the # lists together and removes duplicate labels unique_labels = np.unique( data['Finding Labels'].str.split("|").aggregate(np.concatenate) ).tolist() print(f"Dataset contains the following labels:\n{unique_labels}")