Nicholas Broad nbroad1881

## keybase.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                nbroad1881
                / keybase.md
            
            
              Created
              November 15, 2019 15:17
            
          
    Keybase proof

I hereby claim:

I am nbroad1881 on github.
I am nicholasbroad (https://keybase.io/nicholasbroad) on keybase.
I have a public key ASB10K5suwte9WvhBvNox4bXW95vszH1jaJXZ54ejZAeUAo

To claim this, I am signing this object:

  
## corpus_split.py
!split -l 250000 text_file.txt smaller_
### split [options] filename prefix
### -l linenumber
### -b bytes

import glob

file_list = glob.glob("smaller_*")

## pynb-magic.py
# help for a function
%timeit?

# run code block multiple times to get average time
%%timeit
L = [n ** 2 for n in range(1000)]

# paste multi-line code to cell
%paste
>>> def donothing(x):

## jupyter_dockerfile
FROM ubuntu:18.04

# Set character encoding environment variables
ENV LC_ALL=C.UTF-8 LANG=C.UTF-8

# Allow apt-get install without interaction from console
ENV DEBIAN_FRONTEND=noninteractive

# Set the working dir to the root user home folder
WORKDIR /root

## jupyter_dockerfile
# **************************************************
# Commands to run this dockerfile
# $docker build -t name_of_image directory
#
# $docker run -v ~/path/to/local/dir:/root/work -it --name my_container -p 8888:8888 --rm name_of_image

# (-v stands for volumes. This mounts a local dir to a dir in the container)
# -v ~/path/to/local/dir:/root/work -it  \
# (-it stands for interactive. Any changes to local dir will then be seen in the connected dir in the container
# --name my_container \

## os_and_pathlib.py
import os
from pathlib import Path

# Absolute path of file
absolute_path = os.path.dirname(os.path.abspath(__file__))
# OR
absolute_path = Path(__file__).resolve()

# List contents of directory
os.listdir('dirname-or-blank-for-current-dir')

## dpr_gpu_embeddings.py
# see here https://huggingface.co/docs/datasets/faiss_and_ea.html#adding-a-faiss-index

# I loaded my dataset from a Pandas dataframe
import pandas as pd
df = pd.read_csv("dataset.csv")


from transformers import DPRContextEncoder, DPRContextEncoderTokenizerFast
import torch
torch.set_grad_enabled(False)

## us_state_to_region.py
# full name to abbreviation here: https://gist.github.com/mshafrir/2646763

ste_to_reg = {
        'AA': 'Other',
        'AE': 'Other',
        'AP': 'Other',
        'AK': 'West',
        'AL': 'South',
        'AR': 'South',
        'AS': 'Other',

## multisample_dropout.py
from torch import nn
from transformers import AutoModel

class Model(nn.Module):

    def __init__(self, config):
        super().__init__()

        self.model = AutoModel.from_pretrained(...)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)

## reinit_layers.py
def reinit_model_weights(model, n_layers, config):

    # use whatever you named your transformer module
    backbone = model.backbone

    encoder_layers = backbone.encoder.layer
    reinit_layers(encoder_layers, n_layers, std)

    # use whatever you named the output
    reinit_modules([model.output], std)
	!split -l 250000 text_file.txt smaller_
	### split [options] filename prefix
	### -l linenumber
	### -b bytes

	import glob

	file_list = glob.glob("smaller_*")
	# help for a function
	%timeit?

	# run code block multiple times to get average time
	%%timeit
	L = [n ** 2 for n in range(1000)]

	# paste multi-line code to cell
	%paste
	>>> def donothing(x):
	FROM ubuntu:18.04

	# Set character encoding environment variables
	ENV LC_ALL=C.UTF-8 LANG=C.UTF-8

	# Allow apt-get install without interaction from console
	ENV DEBIAN_FRONTEND=noninteractive

	# Set the working dir to the root user home folder
	WORKDIR /root
	# **************************************************
	# Commands to run this dockerfile
	# $docker build -t name_of_image directory
	#
	# $docker run -v ~/path/to/local/dir:/root/work -it --name my_container -p 8888:8888 --rm name_of_image

	# (-v stands for volumes. This mounts a local dir to a dir in the container)
	# -v ~/path/to/local/dir:/root/work -it \
	# (-it stands for interactive. Any changes to local dir will then be seen in the connected dir in the container
	# --name my_container \
	import os
	from pathlib import Path

	# Absolute path of file
	absolute_path = os.path.dirname(os.path.abspath(__file__))
	# OR
	absolute_path = Path(__file__).resolve()

	# List contents of directory
	os.listdir('dirname-or-blank-for-current-dir')
	# see here https://huggingface.co/docs/datasets/faiss_and_ea.html#adding-a-faiss-index

	# I loaded my dataset from a Pandas dataframe
	import pandas as pd
	df = pd.read_csv("dataset.csv")


	from transformers import DPRContextEncoder, DPRContextEncoderTokenizerFast
	import torch
	torch.set_grad_enabled(False)
	# full name to abbreviation here: https://gist.github.com/mshafrir/2646763

	ste_to_reg = {
	'AA': 'Other',
	'AE': 'Other',
	'AP': 'Other',
	'AK': 'West',
	'AL': 'South',
	'AR': 'South',
	'AS': 'Other',
	from torch import nn
	from transformers import AutoModel

	class Model(nn.Module):

	def __init__(self, config):
	super().__init__()

	self.model = AutoModel.from_pretrained(...)
	self.dropout = nn.Dropout(config.hidden_dropout_prob)
	def reinit_model_weights(model, n_layers, config):

	# use whatever you named your transformer module
	backbone = model.backbone

	encoder_layers = backbone.encoder.layer
	reinit_layers(encoder_layers, n_layers, std)

	# use whatever you named the output
	reinit_modules([model.output], std)