Created
August 30, 2021 19:59
-
-
Save rahulremanan/2ac1b3385d976762889f90749e084c3c to your computer and use it in GitHub Desktop.
Helper functions to handle the FairFace dataset
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def random_imgID_generator(df, pairs=True): | |
num_images = len(df) | |
id1 = random.SystemRandom().choice(range(0,num_images)) | |
if pairs: | |
id2 = random.SystemRandom().choice(range(0,num_images)) | |
return id1, id2 | |
return id1 | |
def eval_conditions(df, id1, id2): | |
id_condition = id1 == id2 | |
race_condition = str(df.iloc[id2].race).lower()==str(df.iloc[id1].race).lower() | |
return id_condition, race_condition | |
def img_pairs_filter(df,id1,id2,max_retries=100): | |
id_condition, race_condition = eval_conditions(df, id1, id2) | |
if id_condition or race_condition: | |
for i in tqdm(range(max_retries)): | |
id2 = random_imgID_generator(df, pairs=False) | |
tqdm.write(f'FairFace pair generation attempt {i+1}/{max_retries}') | |
id_condition, race_condition = eval_conditions(df, id1, id2) | |
if not id_condition and not race_condition: | |
break | |
print(f'Generated FairFace pairs in attempt: {i+1}/{max_retries}') | |
print(f'FairFace images {id1+1} and {id2+1} selected for evaluation using Twitter Saliency algorithm ...\n') | |
return id1, id2 | |
def img_info(df, id1, id2=None, verbose=False): | |
if verbose: | |
print(f'Labels for {id1+1} ...\n') | |
print(df.iloc[id1]) | |
print('\n','-'*32) | |
info1 = { 'file': df['file'].iloc[id1].split('/')[-1].replace('.jpg',''), | |
'race': df['race'].iloc[id1], | |
'gender': df['gender'].iloc[id1], | |
'age': df['age'].iloc[id1] } | |
if id2 is not None: | |
info2 = { 'file': df['file'].iloc[id2].split('/')[-1].replace('.jpg',''), | |
'race': df['race'].iloc[id2], | |
'gender': df['gender'].iloc[id2], | |
'age': df['age'].iloc[id2] } | |
if verbose: | |
print(f'\nLabels for {id2+1} ...\n') | |
print(df.iloc[id2]) | |
return info1, info2 | |
return info1 | |
def execute_in_shell(command, verbose=False): | |
""" | |
command -- keyword argument, takes a list as input | |
verbsoe -- keyword argument, takes a boolean value as input | |
This is a function that executes shell scripts from within python. | |
Keyword argument 'command', should be a list of shell commands. | |
Keyword argument 'versboe', should be a boolean value to set verbose level. | |
Example usage: execute_in_shell(command = ['ls ./some/folder/', | |
ls ./some/folder/ -1 | wc -l'], | |
verbose = True ) | |
This command returns dictionary with elements: Output and Error. | |
Output records the console output, | |
Error records the console error messages. | |
""" | |
error = [] | |
output = [] | |
if isinstance(command, list): | |
for i in range(len(command)): | |
try: | |
process = subprocess.Popen(command[i], shell=True, stdout=subprocess.PIPE) | |
process.wait() | |
out, err = process.communicate() | |
error.append(err) | |
output.append(out) | |
if verbose: | |
print ('Success running shell command: {}'.format(command[i])) | |
except Exception as e: | |
print ('Failed running shell command: {}'.format(command[i])) | |
if verbose: | |
print(type(e)) | |
print(e.args) | |
print(e) | |
print(logging.error(e, exc_info=True)) | |
else: | |
raise ValueError('Expects a list input ...') | |
return {'Output': output, 'Error': error } | |
def clear_image_history(out_dir): | |
_ = execute_in_shell([f'rm -r {out_dir}/*.jpg']) | |
def get_fairface_img(df, img_id, out_dir, fairface_data): | |
file_ = str(df.iloc[img_id].file) | |
_ = execute_in_shell([f'unzip -j -q {fairface_data} {file_} -d {out_dir}']) | |
def randomID_generator(): | |
return ''.join( | |
random.SystemRandom().sample( | |
list( | |
'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmopqrstuvwxyz0123456789' | |
),8)) | |
def fairface_data_checks(fairface_data): | |
if not os.path.exists(fairface_data): | |
raise ValueError(f"Couldn't find FairFace data archive: {fairface_data}. \nPlease download FairFace data from: https://github.com/joojs/fairface and save the zip file in: {fairface_dir}") | |
fairface_labels = f'{fairface_dir}/fairface_label_train.csv' | |
if not os.path.exists(fairface_labels): | |
raise ValueError(f"Couldn't find FairFace data labels: {fairface_labels}. \nPlease download FairFace data labels from: https://github.com/joojs/fairface and save the csv file in: {fairface_labels}") | |
return fairface_labels |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment