This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import pandas as pd | |
import xlrd | |
def createDirs(): | |
# Directory | |
directories = ["./codedata","./codedata/StudyTitle","./codedata/PressReleaseTitle"] | |
for directory in directories: | |
try: | |
# Create the directory |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# TODO(adarob): Add support for 2.x. | |
# %tensorflow_version 1.x | |
import datetime | |
import functools | |
import json | |
import os | |
import pprint | |
import random | |
import string |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import glob | |
import numpy as np | |
files = glob.glob('./codedata/*/*', recursive=True) | |
# Split files into test/train set | |
np.random.seed(1000) # For reproducability | |
np.random.shuffle(files) | |
N = int(float(len(files))*0.8) # Do an 80-20 split for training/validation | |
data = dict( |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
from tqdm import tqdm_notebook as tqdm | |
for split in data: | |
with tf.io.gfile.GFile(os.path.join(DATA_DIR, split+'.txt'), 'w') as g: | |
for fn in tqdm(data[split]): | |
with open(fn, errors='ignore') as f: | |
text = f.read() | |
text = text.replace('\n', ' ').replace('\t', ' ') | |
ans = re.sub(' +', ' ', text) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def ds_func(split, shuffle_files=False): | |
del shuffle_files | |
ds = tf.data.TextLineDataset(os.path.join(DATA_DIR, 'train.txt')) | |
ds = ds.map(lambda ex: dict(text=(ex, print(ex))[0])) | |
return ds | |
for ex in tfds.as_numpy(ds_func("valid").take(5)): | |
print(ex) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
t5.data.TaskRegistry.remove('narcissus') | |
t5.data.TaskRegistry.add( | |
"narcissus", | |
# Supply a function which returns a tf.data.Dataset. | |
dataset_fn=ds_func, | |
splits=["train", "valid"], | |
# Supply a function which preprocesses text from the tf.data.Dataset. | |
text_preprocessor=[, | |
lambda sample: t5.data.preprocessors.prefix_lm(sample, label='article: ') | |
], |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
nq_task = t5.data.TaskRegistry.get("narcissus") | |
ds = nq_task.get_dataset(split="valid", sequence_length={"inputs": 512, "targets": 512}) | |
for ex in tfds.as_numpy(ds.take(5)): | |
print(ex) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def transact(self): | |
""" | |
Specifies the Monte-Carlo transaction behavior (i.e., deposits, trades, etc...) | |
in terms of their respective probability distributions. | |
""" | |
latest_action = 'None' | |
for _ in range(int(round(float(mean_events_per_hour),0))): | |
self.latest_amt = None | |
self.latest_tkn_name = None | |
self.user_name = global_username |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class MonteCarloGenerator(object): | |
""" | |
Generates Monte Carlo scenarios. | |
""" | |
def __init__( | |
self, | |
target_tvl: Decimal, | |
whitelisted_tokens: dict, | |
price_feed: pd.DataFrame, |