Skip to content

Instantly share code, notes, and snippets.

View mrm8488's full-sized avatar
🏠
Working from home

Manuel Romero mrm8488

🏠
Working from home
View GitHub Profile
#Creating the iterable dataset object
dataset = CustomIterableDataset('path_to/somefile')
#Creating the dataloader
dataloader = DataLoader(dataset, batch_size = 64)
for data in dataloader:
#Data is a list containing 64 (=batch_size) consecutive lines of the file
print(len(data)) #[64,]
#We still need to separate the text and labels from each other and preprocess the text
class CustomIterableDatasetv1(IterableDataset):
def __init__(self, filename):
#Store the filename in object's memory
self.filename = filename
#And that's it, we no longer need to store the contents in the memory
def preprocess(self, text):
dataset = CustomIterableDatasetv1('path_to/somefile')
dataloader = DataLoader(dataset, batch_size = 64)
for X, y in dataloader:
print(len(X)) # 64
print(y.shape) # (64,)
### Do something with X and y
###
class CustomIterableDatasetv2(IterableDataset):
def __init__(self, filename_en, filename_gm):
#Store the filenames in object's memory
self.filename_en = filename_en
self.filename_gm = filename_gm
#And that's it, we no longer need to store the contents in the memory
from tkinter import *
from PIL import ImageTk,Image
import time
import os
targetImageWidth = 850
targetImageHeight = 400
inputImageWidth = 0
inputImageHeight = 0
@mrm8488
mrm8488 / smallberta_pretraining.ipynb
Created February 25, 2020 20:44 — forked from aditya-malte/smallberta_pretraining.ipynb
smallBERTa_Pretraining.ipynb
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@mrm8488
mrm8488 / smallberta_pretraining.ipynb
Created February 25, 2020 20:44 — forked from aditya-malte/smallberta_pretraining.ipynb
smallBERTa_Pretraining.ipynb
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
"################################################################################\n",
"# INSTALL CONDA ON GOOGLE COLAB\n",
"################################################################################\n",
"! wget https://repo.anaconda.com/miniconda/Miniconda3-py37_4.8.2-Linux-x86_64.sh\n",
"! chmod +x Miniconda3-py37_4.8.2-Linux-x86_64.sh\n",
"! bash ./Miniconda3-py37_4.8.2-Linux-x86_64.sh -b -f -p /usr/local\n",
"import sys\n",
"sys.path.append('/usr/local/lib/python3.7/site-packages/')"
@mrm8488
mrm8488 / text_dataset_pytorch.py
Created April 17, 2020 03:17
Create an efficient text dataset
class LazyTextDataset(Dataset):
def __init__(self, filename):
self._filename = filename
self._total_data = 0
self._total_data = int(subprocess.check_output("wc -l " + filename, shell=True).split()[0])
def __getitem__(self, idx):
line = linecache.getline(self._filename, idx + 1)
csv_line = csv.reader([line])
return next(csv_line)
@mrm8488
mrm8488 / compare_js_objects.js
Created April 30, 2020 01:45
Vanilla COMPARE JS OBJECTS
const compareObjects = (a, b) => {
if (a === b) return true;
if (typeof a != 'object' || typeof b != 'object' || a == null || b == null) return false;
let keysA = Object.keys(a), keysB = Object.keys(b);
if (keysA.length != keysB.length) return false;
for (let key of keysA) {