Skip to content

Instantly share code, notes, and snippets.

View yohann84L's full-sized avatar
🎯
Focusing

Yohann Lereclus yohann84L

🎯
Focusing
View GitHub Profile
def reduce_mem_usage(props):
"""
Code seen on https://www.kaggle.com/jesucristo/fraud-complete-eda
to reduce memory usage of a dataframe by using the write
dtype for each variable.
/!\ Be careful with it's usage, dat could be broken after /!\
NaN value are replaced by -1
"""
@yohann84L
yohann84L / get_pdf_without_text.py
Created February 4, 2020 09:04
Remove text from PDF
def get_pdf_without_text(pdf: fitz.Document) -> fitz.Document:
for i in range(0, pdf.pageCount):
page = pdf[i]
for xref in page._getContents():
stream = pdf._getXrefStream(xref).replace(b'Tm', b'Tm\n3 Tr')
pdf._updateStream(xref, stream)
return pdf
@yohann84L
yohann84L / gridmask_imaug.py
Last active June 2, 2020 08:30
GridMask augmentation for imgaug
## GridMask augmentation for imgaug
##
## Code based on this kernel https://www.kaggle.com/shivyshiv/efficientnet-gridmask-training-pytorch
from imgaug.augmenters import meta
from imgaug import parameters as iap
import imgaug.augmenters as iaa
@yohann84L
yohann84L / autoaug_imaug.py
Created June 2, 2020 09:16
AutoAug for imgaug pipeline
import random
import numpy as np
from PIL import Image, ImageEnhance, ImageOps
class ImageNetPolicy(object):
def __init__(self, fillcolor=(128, 128, 128)):
self.policies = [
SubPolicy(0.4, "posterize", 8, 0.6, "rotate", 9, fillcolor),