Skip to content

Instantly share code, notes, and snippets.

View Vishnunkumar's full-sized avatar
😉
Exploring Dev

Vishnu Nandakumar Vishnunkumar

😉
Exploring Dev
View GitHub Profile
y_train = np.array(y_train)
class_1 = y_train[:,0]
class_2 = y_train[:,1]
c_1 = len(set(class_1))
c_2 = len(set(class_2))
map_1 = {}
for i, j in enumerate(list(set(class_1))):
map_1[j] = i
datagen = tf.keras.preprocessing.image.ImageDataGenerator(
featurewise_center=False, # set input mean to 0 over the dataset
samplewise_center=False, # set each sample mean to 0
featurewise_std_normalization=False, # divide inputs by std of the dataset
samplewise_std_normalization=False, # divide each input by its std
rotation_range = 30, # randomly rotate images in the range (degrees, 0 to 180)
zoom_range = 0.2, # Randomly zoom image
width_shift_range=0.1, # randomly shift images horizontally (fraction of total width)
height_shift_range=0.1, # randomly shift images vertically (fraction of total height)
horizontal_flip = True, # randomly flip images
data_dir = 'input/automobile-images-dataset'
list_of_data = []
img_size = 128
for fol in os.listdir(data_dir):
for file in os.listdir(os.path.join(data_dir, fol)):
json_dict = {}
img_arr = cv2.imread(os.path.join(os.path.join(data_dir, fol), file))[...,::-1] #convert BGR to RGB format
resized_arr = cv2.resize(img_arr, (img_size, img_size)) # Reshaping images to preferred size
json_dict['image'] = resized_arr
json_dict['class_1'] = fol.split('_')[0]
# conversion of xml to csv
xml_to_csv('/kaggle/input/frdata/fracture_data/train/xml/', 'train.csv')
xml_to_csv('/kaggle/input/frdata/fracture_data/test/xml/', 'test.csv')
# model training
dataset = Dataset('train.csv', '/kaggle/input/frdata/fracture_data/train/images')
val_dataset = Dataset('test.csv', '/kaggle/input/frdata/fracture_data/test/images')
# Create your own DataLoader with custom options
loader = DataLoader(dataset, batch_size=2, shuffle=True)
df_x = final_df[['w_years', 'e_years', 'certification', 'similarity_match']]
df_y = final_df['status']
tr_x, te_x, tr_y, te_y = model_selection.train_test_split(df_x, df_y, test_size=0.2, stratify=df_y, shuffle=True)
rf_mod = ensemble.RandomForestClassifier(n_estimators=20, n_jobs=10,max_features=3)
model = rf_mod.fit(tr_x, tr_y)
print(model.score(te_x, te_y))
pred_y = model.predict(te_x)
def get_best_candidates(query_job_database, candidate_df):
similarity_scores = []
for i in range(candidate_df.shape[0]):
similarity_scores.append(query_job_database.similarity(candidate_df['nlp_skills'].iloc[i]))
candidate_df['similarity_match'] = similarity_scores
candidate_df.sort_values(by=['similarity_match'], ascending=False, inplace=True)
return candidate_df
def get_year_features(c_df, col):
experience = list(c_df[col])
exp = []
for x in experience:
sum_s = []
for k, v in x.items():
for K, V in v[4].items():
s = re.findall('[\d]+', v[4][K])
s = [int(c) for c in s]
c_df = pd.read_csv('dataset/resume_jobs.csv')
c_df.dropna(inplace=True, axis=0)
c_df = c_df[['Resume_title', 'City', 'work_experiences', 'Skills', 'Educations', 'Certificates']]
dict_columns = ['Educations', 'Certificates', 'work_experiences']
for x in dict_columns:
c_df[x] = [eval(y) for y in c_df[x] if y != None]
c_df['skills'] = [eval(c) for c in c_df['Skills']]
job_df = pd.read_csv('dataset/n-jobs.csv')
job_df.columns = ['s.n','title', 'skills']
job_df = job_df[['title', 'skills']]
job_df['skills'] = job_df['skills'].apply(lambda x: str(x).split('|'))
job_df['nlp_doc'] = job_df['skills'].apply(lambda x: nlp((' ').join(x)))
job_df.dropna(axis=0, inplace=True)
def get_best_candidates(num, query_job_database):
similarity_scores = []
for i in range(candidate_df.shape[0]):
similarity_scores.append(query_job_database.similarity(candidate_df['nlp_doc_skills'].iloc[i]))
candidate_df['similarity_match'] = similarity_scores
candidate_df.sort_values(by=['similarity_match'], ascending=False, inplace=True)
return candidate_df[['name','nlp_doc_skills', 'similarity_match']].iloc[:num]