Allan Jie allanj

## iob1toiob2_funct.py
"""
IOB1:  O I I B I
IOB2:  O B I B I
"""

from typing import List

def iob2(tags: List[str]):
    """
    Check that tags have a valid IOB format.

## fairseq_gen.py
import torch
from fairseq.models.bart import BARTModel

bart = BARTModel.from_pretrained(
    'model_files/bart-large-model',
    checkpoint_file='checkpoint_best.pt',
    data_name_or_path='data/cloze_replace_all-bin'
)

bart.cuda()

## BIOtoBIOES.py
def iob_iobes(tags):
    """
    IOB2 (BIO) -> IOBES
    """
    new_tags = []
    for i, tag in enumerate(tags):
        if tag == 'O':
            new_tags.append(tag)
        elif tag.split('-')[0] == 'B':
            if i + 1 != len(tags) and \

## command.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              1 star
            
          
                allanj
                / command.md
            
            
              Created
              August 15, 2022 02:36
            
              
                Useful Command in Linux
              
          
    Kill process contain certain string

For example, kill command contains python3 -u experiment_main.py
kill $(ps aux | grep '[p]ython3 -u experiment_main.py' | awk '{print $2}')
Hadoop List files by date

hdfs dfs -ls / | sort -k6,7

  
## demo_sft_with_accelerate.py
from torch.utils.data import DataLoader
from transformers import AutoTokenizer, PreTrainedTokenizerFast, set_seed, AutoModelForCausalLM, AutoConfig
from tqdm import tqdm
import argparse
import torch
import torch.nn as nn
import logging
from typing import Dict, Tuple
from accelerate import Accelerator, DistributedDataParallelKwargs
from accelerate.logging import get_logger

## bootstrap.py
"""
This is a simple example to show how to calculate the p_value of two models' accuracy
Bootstrapint t-test
"""
import random

random.seed(42)
# assume we have test set 1000 samples
# we just create dummy results to demo
groundtruth = [random.choice(['A', 'B', 'C']) for _ in range(1000)]

## example.jsonl
{"name": "HumanEval_79_decimal_to_binary", "language": "py", "prompt": "def decimal_to_binary(decimal: int) -> str:\n    \"\"\"You will be given a number in decimal form and your task is to convert it to\n    binary format. The function should return a string, with each character representing a binary\n    number. Each character in the string will be '0' or '1'.\n\n    There will be an extra couple of characters 'db' at the beginning and at the end of the string.\n    The extra characters are there to help with the format.\n\n    Examples:\n    >>> decimal_to_binary(15)\n    'db1111db'\n    >>> decimal_to_binary(32)\n    'db100000db'\n    \"\"\"\n", "doctests": "transform", "original": "/home/arjun/repos/nuprl/MultiPL-E/datasets/../datasets/originals-with-cleaned-doctests/HumanEval_79_decimal_to_binary.py", "prompt_terminology": "reworded", "stop_tokens": ["\ndef", "\n#", "\nif", "\nclass"], "entry_point": "decimal_to_binary", "test": "def check(candidate):\n    assert candidate(0) == 'db0db'\n    assert cand
	"""
	IOB1: O I I B I
	IOB2: O B I B I
	"""

	from typing import List

	def iob2(tags: List[str]):
	"""
	Check that tags have a valid IOB format.
	import torch
	from fairseq.models.bart import BARTModel

	bart = BARTModel.from_pretrained(
	'model_files/bart-large-model',
	checkpoint_file='checkpoint_best.pt',
	data_name_or_path='data/cloze_replace_all-bin'
	)

	bart.cuda()
	def iob_iobes(tags):
	"""
	IOB2 (BIO) -> IOBES
	"""
	new_tags = []
	for i, tag in enumerate(tags):
	if tag == 'O':
	new_tags.append(tag)
	elif tag.split('-')[0] == 'B':
	if i + 1 != len(tags) and \
	from torch.utils.data import DataLoader
	from transformers import AutoTokenizer, PreTrainedTokenizerFast, set_seed, AutoModelForCausalLM, AutoConfig
	from tqdm import tqdm
	import argparse
	import torch
	import torch.nn as nn
	import logging
	from typing import Dict, Tuple
	from accelerate import Accelerator, DistributedDataParallelKwargs
	from accelerate.logging import get_logger
	"""
	This is a simple example to show how to calculate the p_value of two models' accuracy
	Bootstrapint t-test
	"""
	import random

	random.seed(42)
	# assume we have test set 1000 samples
	# we just create dummy results to demo
	groundtruth = [random.choice(['A', 'B', 'C']) for _ in range(1000)]