Manuel Romero mrm8488

## smallberta_pretraining.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                mrm8488
                / smallberta_pretraining.ipynb
            
            
              Created
              February 25, 2020 20:44
                — forked from aditya-malte/smallberta_pretraining.ipynb
            
              
                smallBERTa_Pretraining.ipynb
              
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## hypercropz.py
from tkinter import *
from PIL import ImageTk,Image
import time
import os

targetImageWidth = 850
targetImageHeight = 400

inputImageWidth = 0
inputImageHeight = 0

## iterable_dataset_v2.py
class CustomIterableDatasetv2(IterableDataset):

    def __init__(self, filename_en, filename_gm):

        #Store the filenames in object's memory
        self.filename_en = filename_en
        self.filename_gm = filename_gm

        #And that's it, we no longer need to store the contents in the memory

## iterable_dataloader_v2.py
dataset = CustomIterableDatasetv1('path_to/somefile')
dataloader = DataLoader(dataset, batch_size = 64)

for X, y in dataloader:
    print(len(X)) # 64
    print(y.shape) # (64,)

    ### Do something with X and y

    ###

## iterable_dataset_v1.py
class CustomIterableDatasetv1(IterableDataset):

    def __init__(self, filename):

        #Store the filename in object's memory
        self.filename = filename

        #And that's it, we no longer need to store the contents in the memory

    def preprocess(self, text):

## iterable_dataloader_v0.py
#Creating the iterable dataset object
dataset     = CustomIterableDataset('path_to/somefile')
#Creating the dataloader
dataloader  = DataLoader(dataset, batch_size = 64)

for data in dataloader:
    #Data is a list containing 64 (=batch_size) consecutive lines of the file
    print(len(data)) #[64,]

    #We still need to separate the text and labels from each other and preprocess the text

## iterable_dataset_v0.py
from torch.utils.data import IterableDataset

class CustomIterableDataset(IterableDataset):

    def __init__(self, filename):

        #Store the filename in object's memory
        self.filename = filename

        #And that's it, we no longer need to store the contents in the memory

## ngrok_tensorboard_colab.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              2 stars
            
          
                mrm8488
                / ngrok_tensorboard_colab.md
            
            
              Last active
              September 19, 2022 09:35
            
              
                Setup ngrok and run TensorBoard on Colab
              
          
    Setup ngrok and run TensorBoard on Colab
!wget https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip
!unzip ngrok-stable-linux-amd64.zip
LOG_DIR = './log'
get_ipython().system_raw(

  
## app.js
var mongoose = require('mongoose');

mongoose.connect('mongodb://localhost/test');

var db = mongoose.connection;

db.on('error', function() {
  return console.error.bind(console, 'connection error: ');
});


## memory_profiling.sh
while ps auxw | grep '[m]yscript'; do sleep 30; done | stdbuf -o0 uniq | ts
# Monitor changes in memory usage of myscript and timestamp the lines using ts. stdbuf -o0 turns off output buffering. [m] in the grep expression prevents the grep process line itself from being matched.
	from tkinter import *
	from PIL import ImageTk,Image
	import time
	import os

	targetImageWidth = 850
	targetImageHeight = 400

	inputImageWidth = 0
	inputImageHeight = 0
	class CustomIterableDatasetv2(IterableDataset):

	def __init__(self, filename_en, filename_gm):

	#Store the filenames in object's memory
	self.filename_en = filename_en
	self.filename_gm = filename_gm

	#And that's it, we no longer need to store the contents in the memory
	dataset = CustomIterableDatasetv1('path_to/somefile')
	dataloader = DataLoader(dataset, batch_size = 64)

	for X, y in dataloader:
	print(len(X)) # 64
	print(y.shape) # (64,)

	### Do something with X and y

	###
	class CustomIterableDatasetv1(IterableDataset):

	def __init__(self, filename):

	#Store the filename in object's memory
	self.filename = filename

	#And that's it, we no longer need to store the contents in the memory

	def preprocess(self, text):
	#Creating the iterable dataset object
	dataset = CustomIterableDataset('path_to/somefile')
	#Creating the dataloader
	dataloader = DataLoader(dataset, batch_size = 64)

	for data in dataloader:
	#Data is a list containing 64 (=batch_size) consecutive lines of the file
	print(len(data)) #[64,]

	#We still need to separate the text and labels from each other and preprocess the text
	from torch.utils.data import IterableDataset

	class CustomIterableDataset(IterableDataset):

	def __init__(self, filename):

	#Store the filename in object's memory
	self.filename = filename

	#And that's it, we no longer need to store the contents in the memory
	var mongoose = require('mongoose');

	mongoose.connect('mongodb://localhost/test');

	var db = mongoose.connection;

	db.on('error', function() {
	return console.error.bind(console, 'connection error: ');
	});
	while ps auxw \| grep '[m]yscript'; do sleep 30; done \| stdbuf -o0 uniq \| ts
	# Monitor changes in memory usage of myscript and timestamp the lines using ts. stdbuf -o0 turns off output buffering. [m] in the grep expression prevents the grep process line itself from being matched.