Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save rahulremanan/2ac1b3385d976762889f90749e084c3c to your computer and use it in GitHub Desktop.
Save rahulremanan/2ac1b3385d976762889f90749e084c3c to your computer and use it in GitHub Desktop.
Helper functions to handle the FairFace dataset
def random_imgID_generator(df, pairs=True):
num_images = len(df)
id1 = random.SystemRandom().choice(range(0,num_images))
if pairs:
id2 = random.SystemRandom().choice(range(0,num_images))
return id1, id2
return id1
def eval_conditions(df, id1, id2):
id_condition = id1 == id2
race_condition = str(df.iloc[id2].race).lower()==str(df.iloc[id1].race).lower()
return id_condition, race_condition
def img_pairs_filter(df,id1,id2,max_retries=100):
id_condition, race_condition = eval_conditions(df, id1, id2)
if id_condition or race_condition:
for i in tqdm(range(max_retries)):
id2 = random_imgID_generator(df, pairs=False)
tqdm.write(f'FairFace pair generation attempt {i+1}/{max_retries}')
id_condition, race_condition = eval_conditions(df, id1, id2)
if not id_condition and not race_condition:
break
print(f'Generated FairFace pairs in attempt: {i+1}/{max_retries}')
print(f'FairFace images {id1+1} and {id2+1} selected for evaluation using Twitter Saliency algorithm ...\n')
return id1, id2
def img_info(df, id1, id2=None, verbose=False):
if verbose:
print(f'Labels for {id1+1} ...\n')
print(df.iloc[id1])
print('\n','-'*32)
info1 = { 'file': df['file'].iloc[id1].split('/')[-1].replace('.jpg',''),
'race': df['race'].iloc[id1],
'gender': df['gender'].iloc[id1],
'age': df['age'].iloc[id1] }
if id2 is not None:
info2 = { 'file': df['file'].iloc[id2].split('/')[-1].replace('.jpg',''),
'race': df['race'].iloc[id2],
'gender': df['gender'].iloc[id2],
'age': df['age'].iloc[id2] }
if verbose:
print(f'\nLabels for {id2+1} ...\n')
print(df.iloc[id2])
return info1, info2
return info1
def execute_in_shell(command, verbose=False):
"""
command -- keyword argument, takes a list as input
verbsoe -- keyword argument, takes a boolean value as input
This is a function that executes shell scripts from within python.
Keyword argument 'command', should be a list of shell commands.
Keyword argument 'versboe', should be a boolean value to set verbose level.
Example usage: execute_in_shell(command = ['ls ./some/folder/',
ls ./some/folder/ -1 | wc -l'],
verbose = True )
This command returns dictionary with elements: Output and Error.
Output records the console output,
Error records the console error messages.
"""
error = []
output = []
if isinstance(command, list):
for i in range(len(command)):
try:
process = subprocess.Popen(command[i], shell=True, stdout=subprocess.PIPE)
process.wait()
out, err = process.communicate()
error.append(err)
output.append(out)
if verbose:
print ('Success running shell command: {}'.format(command[i]))
except Exception as e:
print ('Failed running shell command: {}'.format(command[i]))
if verbose:
print(type(e))
print(e.args)
print(e)
print(logging.error(e, exc_info=True))
else:
raise ValueError('Expects a list input ...')
return {'Output': output, 'Error': error }
def clear_image_history(out_dir):
_ = execute_in_shell([f'rm -r {out_dir}/*.jpg'])
def get_fairface_img(df, img_id, out_dir, fairface_data):
file_ = str(df.iloc[img_id].file)
_ = execute_in_shell([f'unzip -j -q {fairface_data} {file_} -d {out_dir}'])
def randomID_generator():
return ''.join(
random.SystemRandom().sample(
list(
'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmopqrstuvwxyz0123456789'
),8))
def fairface_data_checks(fairface_data):
if not os.path.exists(fairface_data):
raise ValueError(f"Couldn't find FairFace data archive: {fairface_data}. \nPlease download FairFace data from: https://github.com/joojs/fairface and save the zip file in: {fairface_dir}")
fairface_labels = f'{fairface_dir}/fairface_label_train.csv'
if not os.path.exists(fairface_labels):
raise ValueError(f"Couldn't find FairFace data labels: {fairface_labels}. \nPlease download FairFace data labels from: https://github.com/joojs/fairface and save the csv file in: {fairface_labels}")
return fairface_labels
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment