Skip to content

Instantly share code, notes, and snippets.

Mike Casale mikewcasale

Block or report user

Report or block mikewcasale

Hide content and notifications from this user.

Learn more about blocking users

Contact Support about this user’s behavior.

Learn more about reporting abuse

Report abuse
View GitHub Profile
@mikewcasale
mikewcasale / narcissus_pr.py
Created Mar 26, 2020
narcissus print check
View narcissus_pr.py
nq_task = t5.data.TaskRegistry.get("narcissus")
ds = nq_task.get_dataset(split="valid", sequence_length={"inputs": 512, "targets": 512})
for ex in tfds.as_numpy(ds.take(5)):
print(ex)
View narcissus.py
t5.data.TaskRegistry.remove('narcissus')
t5.data.TaskRegistry.add(
"narcissus",
# Supply a function which returns a tf.data.Dataset.
dataset_fn=ds_func,
splits=["train", "valid"],
# Supply a function which preprocesses text from the tf.data.Dataset.
text_preprocessor=[,
lambda sample: t5.data.preprocessors.prefix_lm(sample, label='article: ')
],
View ds_func.py
def ds_func(split, shuffle_files=False):
del shuffle_files
ds = tf.data.TextLineDataset(os.path.join(DATA_DIR, 'train.txt'))
ds = ds.map(lambda ex: dict(text=(ex, print(ex))[0]))
return ds
for ex in tfds.as_numpy(ds_func("valid").take(5)):
print(ex)
@mikewcasale
mikewcasale / cleanup.py
Created Mar 26, 2020
cleanup NLP input
View cleanup.py
import re
from tqdm import tqdm_notebook as tqdm
for split in data:
with tf.io.gfile.GFile(os.path.join(DATA_DIR, split+'.txt'), 'w') as g:
for fn in tqdm(data[split]):
with open(fn, errors='ignore') as f:
text = f.read()
text = text.replace('\n', ' ').replace('\t', ' ')
ans = re.sub(' +', ' ', text)
View globit.py
import glob
import numpy as np
files = glob.glob('./codedata/*/*', recursive=True)
# Split files into test/train set
np.random.seed(1000) # For reproducability
np.random.shuffle(files)
N = int(float(len(files))*0.8) # Do an 80-20 split for training/validation
data = dict(
View t5_colab_pt1.py
# TODO(adarob): Add support for 2.x.
# %tensorflow_version 1.x
import datetime
import functools
import json
import os
import pprint
import random
import string
@mikewcasale
mikewcasale / xls_to_csv_etractor.py
Last active Mar 26, 2020
extracts data from xls file and creates new csv
View xls_to_csv_etractor.py
import os
import pandas as pd
import xlrd
def createDirs():
# Directory
directories = ["./codedata","./codedata/StudyTitle","./codedata/PressReleaseTitle"]
for directory in directories:
try:
# Create the directory
You can’t perform that action at this time.