Skip to content

Instantly share code, notes, and snippets.

View Momotoculteur's full-sized avatar

Bastien MAURICE Momotoculteur

View GitHub Profile
@Momotoculteur
Momotoculteur / preprocessData1
Last active May 31, 2019 12:05
tuto Recommandeur hashtag image
df=pd.read_csv('./HARRISON/data_list.txt')
df2=pd.read_csv('./HARRISON/tag_list.txt')
df3 = pd.concat( [df, df2], axis=1)
df3.columns = ["path", "labels"]
for index, row in tqdm(df3.iterrows(), total=df3.shape[0]):
temp = row['labels'].replace(" ", ",")
temp = temp[:-1]
df3.at[index, 'labels'] = temp
df3.to_csv('./HARRISON/dataTest.txt', header=["path", "labels"], index=None, sep=',', mode='w')
colnames=['classe']
df=pd.read_csv('./HARRISON/vocab_index.txt', names=colnames, header=None)
pattern=reg.compile(r"(.)\1{1,}",reg.DOTALL)
for index, row in tqdm(df.iterrows(), total=df.shape[0]):
temp = row['classe'].replace(" ", ",")
print(pattern.sub(r"\1",temp))
df.at[index, 'classe'] = pattern.sub(r"\1",temp)
df.to_csv('./HARRISON/listClass.txt', header=["classe"], index=None, sep=',', mode='w')
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
CLASSE = pd.read_csv("./HARRISON/listClass.txt",sep=',', names=["classe", "index"])
df = pd.read_csv("./HARRISON/data.txt")
df["labels"]=df["labels"].apply(lambda x:x.split(","))
NB_CLASSES = 994 # Permet de fix le nombre de classe manquante du dataset
NB_EPOCH = 1
BATCH_SIZE = 32
SHUFFLE = True
IMG_SIZE = (96,96)
TRAINSIZE_RATIO = 0.8
TRAINSIZE = int(df.shape[0] * TRAINSIZE_RATIO)
LIST_CLASS = []
DIRECTORY_DATA = "./HARRISON/"
DIRECTORY_TRAINED_MODEL = './trainedModel/model.hdf5'
save_model_callback = ModelCheckpoint(DIRECTORY_TRAINED_MODEL,
verbose=1,
save_best_only=True,
save_weights_only=False,
mode='auto',
period=1,
monitor='val_acc')
early_stopping = EarlyStopping(verbose=1,monitor='val_acc', min_delta=0, patience=3, mode='auto')
datagen=ImageDataGenerator(rescale=1./255.)
test_datagen=ImageDataGenerator(rescale=1./255.)
train_generator=datagen.flow_from_dataframe(dataframe=df[:TRAINSIZE],
directory=DIRECTORY_DATA,
x_col="path",
y_col="labels",
batch_size=BATCH_SIZE,
seed=42,
shuffle=SHUFFLE,
class_mode="categorical",
classes=LIST_CLASS,
target_size=IMG_SIZE,
labels = train_generator.class_indices
with open('./classIndice.txt', 'w') as file:
file.write(json.dumps(labels))
baseModel = MobileNetV2(input_shape=(96,96,3), alpha=1.0, include_top=False, weights='imagenet', input_tensor=None, pooling='max')