Skip to content

Instantly share code, notes, and snippets.

@fuwiak
Last active August 23, 2019 18:52
Show Gist options
  • Save fuwiak/2b1f598dc4b577e91c3294579025bb5f to your computer and use it in GitHub Desktop.
Save fuwiak/2b1f598dc4b577e91c3294579025bb5f to your computer and use it in GitHub Desktop.
import ast # run code from string for example ast.literal_eval("1+1")
salaries = df.salary.dropna() # remove all NA's from dataframe
currencies = [ast.literal_eval(salaries.iloc[i])['currency'] for i in range(len(salaries))]
curr = set(currencies) #{'EUR', 'RUR', 'USD'}
#divide dataframe salararies by currency
rur = [ast.literal_eval(salaries.iloc[i]) for i in range(len(salaries)) if ast.literal_eval(salaries.iloc[i])['currency']=='RUR']
eur = [ast.literal_eval(salaries.iloc[i]) for i in range(len(salaries)) if ast.literal_eval(salaries.iloc[i])['currency']=='EUR']
usd = [ast.literal_eval(salaries.iloc[i]) for i in range(len(salaries)) if ast.literal_eval(salaries.iloc[i])['currency']=='USD']
fr = [x['from'] for x in rur] # lower range of salary
fr = list(filter(lambda x: x is not None, fr)) # remove NA's from lower range [0, 100, 200,...]
to = [x['to'] for x in rur] #upper range of salary
to = list(filter(lambda x: x is not None, to)) #remove NA's from lower range [100, 200, 300,...]
import numpy as np
salary_range = list(zip(fr, to)) # concatenate upper and lower range [(0,100), (100, 200), (200, 300)...]
av = map(np.mean, salary_range) # convert [(0,100), (100, 200), (200, 300)...] to [50, 150, 250,...]
av = round(np.mean(list(av)),1) # average value from [50, 150, 250,...]
print("average salary as Data Scientist ", av, "rubles")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment