This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| y_train = np.array(y_train) | |
| class_1 = y_train[:,0] | |
| class_2 = y_train[:,1] | |
| c_1 = len(set(class_1)) | |
| c_2 = len(set(class_2)) | |
| map_1 = {} | |
| for i, j in enumerate(list(set(class_1))): | |
| map_1[j] = i |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| datagen = tf.keras.preprocessing.image.ImageDataGenerator( | |
| featurewise_center=False, # set input mean to 0 over the dataset | |
| samplewise_center=False, # set each sample mean to 0 | |
| featurewise_std_normalization=False, # divide inputs by std of the dataset | |
| samplewise_std_normalization=False, # divide each input by its std | |
| rotation_range = 30, # randomly rotate images in the range (degrees, 0 to 180) | |
| zoom_range = 0.2, # Randomly zoom image | |
| width_shift_range=0.1, # randomly shift images horizontally (fraction of total width) | |
| height_shift_range=0.1, # randomly shift images vertically (fraction of total height) | |
| horizontal_flip = True, # randomly flip images |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| data_dir = 'input/automobile-images-dataset' | |
| list_of_data = [] | |
| img_size = 128 | |
| for fol in os.listdir(data_dir): | |
| for file in os.listdir(os.path.join(data_dir, fol)): | |
| json_dict = {} | |
| img_arr = cv2.imread(os.path.join(os.path.join(data_dir, fol), file))[...,::-1] #convert BGR to RGB format | |
| resized_arr = cv2.resize(img_arr, (img_size, img_size)) # Reshaping images to preferred size | |
| json_dict['image'] = resized_arr | |
| json_dict['class_1'] = fol.split('_')[0] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # conversion of xml to csv | |
| xml_to_csv('/kaggle/input/frdata/fracture_data/train/xml/', 'train.csv') | |
| xml_to_csv('/kaggle/input/frdata/fracture_data/test/xml/', 'test.csv') | |
| # model training | |
| dataset = Dataset('train.csv', '/kaggle/input/frdata/fracture_data/train/images') | |
| val_dataset = Dataset('test.csv', '/kaggle/input/frdata/fracture_data/test/images') | |
| # Create your own DataLoader with custom options | |
| loader = DataLoader(dataset, batch_size=2, shuffle=True) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| df_x = final_df[['w_years', 'e_years', 'certification', 'similarity_match']] | |
| df_y = final_df['status'] | |
| tr_x, te_x, tr_y, te_y = model_selection.train_test_split(df_x, df_y, test_size=0.2, stratify=df_y, shuffle=True) | |
| rf_mod = ensemble.RandomForestClassifier(n_estimators=20, n_jobs=10,max_features=3) | |
| model = rf_mod.fit(tr_x, tr_y) | |
| print(model.score(te_x, te_y)) | |
| pred_y = model.predict(te_x) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def get_best_candidates(query_job_database, candidate_df): | |
| similarity_scores = [] | |
| for i in range(candidate_df.shape[0]): | |
| similarity_scores.append(query_job_database.similarity(candidate_df['nlp_skills'].iloc[i])) | |
| candidate_df['similarity_match'] = similarity_scores | |
| candidate_df.sort_values(by=['similarity_match'], ascending=False, inplace=True) | |
| return candidate_df |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def get_year_features(c_df, col): | |
| experience = list(c_df[col]) | |
| exp = [] | |
| for x in experience: | |
| sum_s = [] | |
| for k, v in x.items(): | |
| for K, V in v[4].items(): | |
| s = re.findall('[\d]+', v[4][K]) | |
| s = [int(c) for c in s] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| c_df = pd.read_csv('dataset/resume_jobs.csv') | |
| c_df.dropna(inplace=True, axis=0) | |
| c_df = c_df[['Resume_title', 'City', 'work_experiences', 'Skills', 'Educations', 'Certificates']] | |
| dict_columns = ['Educations', 'Certificates', 'work_experiences'] | |
| for x in dict_columns: | |
| c_df[x] = [eval(y) for y in c_df[x] if y != None] | |
| c_df['skills'] = [eval(c) for c in c_df['Skills']] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| job_df = pd.read_csv('dataset/n-jobs.csv') | |
| job_df.columns = ['s.n','title', 'skills'] | |
| job_df = job_df[['title', 'skills']] | |
| job_df['skills'] = job_df['skills'].apply(lambda x: str(x).split('|')) | |
| job_df['nlp_doc'] = job_df['skills'].apply(lambda x: nlp((' ').join(x))) | |
| job_df.dropna(axis=0, inplace=True) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def get_best_candidates(num, query_job_database): | |
| similarity_scores = [] | |
| for i in range(candidate_df.shape[0]): | |
| similarity_scores.append(query_job_database.similarity(candidate_df['nlp_doc_skills'].iloc[i])) | |
| candidate_df['similarity_match'] = similarity_scores | |
| candidate_df.sort_values(by=['similarity_match'], ascending=False, inplace=True) | |
| return candidate_df[['name','nlp_doc_skills', 'similarity_match']].iloc[:num] |