This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def create_skillset_dict(resume_names, resume_texts): | |
'''Create a dictionary containing a set of the extracted skills. Name is key, matching skillset is value''' | |
skillsets = [create_skill_set(resume_text) for resume_text in resume_texts] | |
return dict(zip(resume_names, skillsets)) | |
def match_skills(vacature_set, cv_set, resume_name): | |
'''Get intersection of resume skills and job offer skills and return match percentage''' | |
if len(vacature_set) < 1: | |
print('could not extract skills from job offer text') | |
else: | |
pct_match = round(len(vacature_set.intersection(cv_set[resume_name])) / len(vacature_set) * 100, 0) | |
print(resume_name + " has a {}% skill match on this job offer".format(pct_match)) | |
print('Required skills: {} '.format(vacature_set)) | |
print('Matched skills: {} \n'.format(vacature_set.intersection(skillset_dict[resume_name]))) | |
return (resume_name, pct_match) | |
add_newruler_to_pipeline(skill_pattern_path) | |
resume_texts, resume_names = create_tokenized_texts_list(extension) | |
skillset_dict = create_skillset_dict(resume_names, resume_texts) | |
# example of job offer text (string). Can input your own. | |
vacature_text = vacatures_df[vacatures_df['soort_vacature'] == 'Data Scientist'].skills.iloc[13] | |
# Create a set of the skills extracted from the job offer text | |
vacature_skillset = create_skill_set(nlp(vacature_text)) | |
# Create a list with tuple pairs containing the names of the candidates and their match percentage | |
match_pairs = [match_skills(vacature_skillset, skillset_dict, name) for name in skillset_dict.keys()] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment