Allan Jie allanj

## example.jsonl
{"name": "HumanEval_79_decimal_to_binary", "language": "py", "prompt": "def decimal_to_binary(decimal: int) -> str:\n    \"\"\"You will be given a number in decimal form and your task is to convert it to\n    binary format. The function should return a string, with each character representing a binary\n    number. Each character in the string will be '0' or '1'.\n\n    There will be an extra couple of characters 'db' at the beginning and at the end of the string.\n    The extra characters are there to help with the format.\n\n    Examples:\n    >>> decimal_to_binary(15)\n    'db1111db'\n    >>> decimal_to_binary(32)\n    'db100000db'\n    \"\"\"\n", "doctests": "transform", "original": "/home/arjun/repos/nuprl/MultiPL-E/datasets/../datasets/originals-with-cleaned-doctests/HumanEval_79_decimal_to_binary.py", "prompt_terminology": "reworded", "stop_tokens": ["\ndef", "\n#", "\nif", "\nclass"], "entry_point": "decimal_to_binary", "test": "def check(candidate):\n    assert candidate(0) == 'db0db'\n    assert cand

## bootstrap.py
"""
This is a simple example to show how to calculate the p_value of two models' accuracy
Bootstrapint t-test
"""
import random

random.seed(42)
# assume we have test set 1000 samples
# we just create dummy results to demo
groundtruth = [random.choice(['A', 'B', 'C']) for _ in range(1000)]

## demo_sft_with_accelerate.py
from torch.utils.data import DataLoader
from transformers import AutoTokenizer, PreTrainedTokenizerFast, set_seed, AutoModelForCausalLM, AutoConfig
from tqdm import tqdm
import argparse
import torch
import torch.nn as nn
import logging
from typing import Dict, Tuple
from accelerate import Accelerator, DistributedDataParallelKwargs
from accelerate.logging import get_logger

## command.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              1 star
            
          
                allanj
                / command.md
            
            
              Created
              August 15, 2022 02:36
            
              
                Useful Command in Linux
              
          
    Kill process contain certain string

For example, kill command contains python3 -u experiment_main.py
kill $(ps aux | grep '[p]ython3 -u experiment_main.py' | awk '{print $2}')
Hadoop List files by date

hdfs dfs -ls / | sort -k6,7

  
## BIOtoBIOES.py
def iob_iobes(tags):
    """
    IOB2 (BIO) -> IOBES
    """
    new_tags = []
    for i, tag in enumerate(tags):
        if tag == 'O':
            new_tags.append(tag)
        elif tag.split('-')[0] == 'B':
            if i + 1 != len(tags) and \

## fairseq_gen.py
import torch
from fairseq.models.bart import BARTModel

bart = BARTModel.from_pretrained(
    'model_files/bart-large-model',
    checkpoint_file='checkpoint_best.pt',
    data_name_or_path='data/cloze_replace_all-bin'
)

bart.cuda()

## iob1toiob2_funct.py
"""
IOB1:  O I I B I
IOB2:  O B I B I
"""

from typing import List

def iob2(tags: List[str]):
    """
    Check that tags have a valid IOB format.

## Random Images on Refresh
<!DOCTYPE html>
<head>

<!--Little CSS fade in -->
<style>
.fade-in{
  -webkit-animation: fade-in 2s ease;
  -moz-animation: fade-in ease-in-out 2s both;
  -ms-animation: fade-in ease-in-out 2s both;
  -o-animation: fade-in ease-in-out 2s both;

## Install
pip install streamlit
pip install spacy
python -m spacy download en_core_web_sm
python -m spacy download en_core_web_md
python -m spacy download de_core_news_sm

## coref_bert.jsonnet
local bert_model = "bert-base-uncased";
local train_path = "./datasets/coref/train.english.v4_gold_conll";
local dev_path = "./datasets/coref/dev.english.v4_gold_conll";
local test_path = "./datasets/coref/test.english.v4_gold_conll";

{
  "dataset_reader": {
    "type": "coref",
    "token_indexers": {
      "bert": {
	"""
	This is a simple example to show how to calculate the p_value of two models' accuracy
	Bootstrapint t-test
	"""
	import random

	random.seed(42)
	# assume we have test set 1000 samples
	# we just create dummy results to demo
	groundtruth = [random.choice(['A', 'B', 'C']) for _ in range(1000)]
	from torch.utils.data import DataLoader
	from transformers import AutoTokenizer, PreTrainedTokenizerFast, set_seed, AutoModelForCausalLM, AutoConfig
	from tqdm import tqdm
	import argparse
	import torch
	import torch.nn as nn
	import logging
	from typing import Dict, Tuple
	from accelerate import Accelerator, DistributedDataParallelKwargs
	from accelerate.logging import get_logger
	def iob_iobes(tags):
	"""
	IOB2 (BIO) -> IOBES
	"""
	new_tags = []
	for i, tag in enumerate(tags):
	if tag == 'O':
	new_tags.append(tag)
	elif tag.split('-')[0] == 'B':
	if i + 1 != len(tags) and \
	import torch
	from fairseq.models.bart import BARTModel

	bart = BARTModel.from_pretrained(
	'model_files/bart-large-model',
	checkpoint_file='checkpoint_best.pt',
	data_name_or_path='data/cloze_replace_all-bin'
	)

	bart.cuda()
	"""
	IOB1: O I I B I
	IOB2: O B I B I
	"""

	from typing import List

	def iob2(tags: List[str]):
	"""
	Check that tags have a valid IOB format.
	<!DOCTYPE html>
	<head>

	<!--Little CSS fade in -->
	<style>
	.fade-in{
	-webkit-animation: fade-in 2s ease;
	-moz-animation: fade-in ease-in-out 2s both;
	-ms-animation: fade-in ease-in-out 2s both;
	-o-animation: fade-in ease-in-out 2s both;
	pip install streamlit
	pip install spacy
	python -m spacy download en_core_web_sm
	python -m spacy download en_core_web_md
	python -m spacy download de_core_news_sm
	local bert_model = "bert-base-uncased";
	local train_path = "./datasets/coref/train.english.v4_gold_conll";
	local dev_path = "./datasets/coref/dev.english.v4_gold_conll";
	local test_path = "./datasets/coref/test.english.v4_gold_conll";

	{
	"dataset_reader": {
	"type": "coref",
	"token_indexers": {
	"bert": {