(Bill) Yuchen Lin yuchenlin

## Yi-6B-Chat.0-100.json
[
  {
    "session_id": "7f4abf64593c439f8b085585aeeda566",
    "model_input": "<|im_start|>user\nGive me an introduction over 200 words for ShangHai BMG Chemical Co., Ltd, a chemical company in Room 602, no 291 sikai road shanghai Shanghai,China<|im_end|>\n<|im_start|>assistant\n",
    "output": [
      "ShangHai BMG Chemical Co., Ltd. is a leading chemical company located in Room 602, No. 291 Sikai Road, Shanghai, China. Established in 2010, the company has since been dedicated to providing high-quality chemical products and services to its customers. With a strong R&D team and advanced production facilities, ShangHai BMG Chemical Co., Ltd. has become a trusted partner in the chemical industry, serving a wide range of clients across various sectors. The company's commitment to innovation, quality, and customer satisfaction has enabled it to maintain a competitive edge in the market.",
      "ShangHai BMG Chemical Co., Ltd. is a leading chemical company located in Room 602, No. 291 Sikai Road, Shanghai, Chin

## chat_app.py
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import StoppingCriteria, StoppingCriteriaList, LogitsProcessor, LogitsProcessorList

model_path = "./qlora-out-hkg_300B/merged/"

tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto", trust_remote_code=True)


## gpt-multiplication.py
"""
Decompose and memorize by program, only asking LLMs to do low-level computations.
"""

import openai
import re
import math

from tenacity import (
    retry,

## 561-hw3-grading.py
import sys

acc = float(sys.argv[1]) # mnist acc
ta_acc = float(sys.argv[2]) # ta acc

t1=50.00
t2=90.00

tt1=30.0
tt2=60.0

## grade.sh
#!/bin/bash

echo "HW3 Report" > $vocareumReportFile
echo "Programming language..." >> $vocareumReportFile

filename=$(ls|grep NeuralNetwork)
if [[ $filename =~ (^|[[:space:]])"NeuralNetwork.py"($|[[:space:]]) ]]; then
	cmd="python NeuralNetwork.py train_image.csv train_label.csv test_image.csv"
    mnist_cmd="python NeuralNetwork.py grading_train_image.csv grading_train_label.csv grading_test_image.csv"
    ta_cmd="python NeuralNetwork.py grading_train_image.csv grading_train_label.csv additional_test_image.csv"

## stanza_parse.py
"""
# stanza.download()
# http://nlp.stanford.edu/software/stanza/1.0.0/en/default.zip

Example usage:
CUDA_VISIBLE_DEVICES=1 \
python parsing.py \
    --input_corpus_path ./corpora/gkb_best_sent.txt \
    --output_json_path ./parses/gkb_best.parses.jsonl \
    --prefix gkb_best --num_shards 10000 --shard_id 0

## encoding.py
from argparse import Namespace

from numpy.core.defchararray import index
from semanticdebugger.debug_algs.cl_simple_alg import ContinualFinetuning
from tqdm import tqdm
import torch
from semanticdebugger.models.utils import trim_batch
import json
from semanticdebugger.debug_algs import run_lifelong_finetune

## mv.sh
#!/bin/bash
# Usage: bash mv.sh /path/to/src/ /path/to/target/

sourcedir=$1
targetdir=$2

filecount=$(find $sourcedir | wc -l)
echo $filecount # Print the number of the total files.
mkdir $targetdir
mv -v $sourcedir $targetdir | pv -l -s $filecount > /tmp/mv_log.txt

## bert_kmeans.py
from sentence_transformers import SentenceTransformer   # pip install -U sentence-transformers
from sklearn.cluster import KMeans
from collections import defaultdict

INPUT_FILE = "/tmp/test_input.txt"

with open(INPUT_FILE, "r") as f:
    lines = f.read().splitlines()

print(len(lines))

## batched_roberta_infer.py
import torch
import numpy as np
from tqdm import tqdm
from fairseq.models.roberta import RobertaModel
from fairseq.data.data_utils import collate_tokens
from torch.utils.data import DataLoader, SequentialSampler

roberta = torch.hub.load('pytorch/fairseq', 'roberta.large.mnli')
roberta.eval()
roberta.cuda()
	[
	{
	"session_id": "7f4abf64593c439f8b085585aeeda566",
	"model_input": "<\|im_start\|>user\nGive me an introduction over 200 words for ShangHai BMG Chemical Co., Ltd, a chemical company in Room 602, no 291 sikai road shanghai Shanghai,China<\|im_end\|>\n<\|im_start\|>assistant\n",
	"output": [
	"ShangHai BMG Chemical Co., Ltd. is a leading chemical company located in Room 602, No. 291 Sikai Road, Shanghai, China. Established in 2010, the company has since been dedicated to providing high-quality chemical products and services to its customers. With a strong R&D team and advanced production facilities, ShangHai BMG Chemical Co., Ltd. has become a trusted partner in the chemical industry, serving a wide range of clients across various sectors. The company's commitment to innovation, quality, and customer satisfaction has enabled it to maintain a competitive edge in the market.",
	"ShangHai BMG Chemical Co., Ltd. is a leading chemical company located in Room 602, No. 291 Sikai Road, Shanghai, Chin
	import gradio as gr
	from transformers import AutoModelForCausalLM, AutoTokenizer
	from transformers import StoppingCriteria, StoppingCriteriaList, LogitsProcessor, LogitsProcessorList

	model_path = "./qlora-out-hkg_300B/merged/"

	tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False, trust_remote_code=True)
	model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto", trust_remote_code=True)
	"""
	Decompose and memorize by program, only asking LLMs to do low-level computations.
	"""

	import openai
	import re
	import math

	from tenacity import (
	retry,
	import sys

	acc = float(sys.argv[1]) # mnist acc
	ta_acc = float(sys.argv[2]) # ta acc

	t1=50.00
	t2=90.00

	tt1=30.0
	tt2=60.0
	#!/bin/bash

	echo "HW3 Report" > $vocareumReportFile
	echo "Programming language..." >> $vocareumReportFile

	filename=$(ls\|grep NeuralNetwork)
	if [[ $filename =~ (^\|[[:space:]])"NeuralNetwork.py"($\|[[:space:]]) ]]; then
	cmd="python NeuralNetwork.py train_image.csv train_label.csv test_image.csv"
	mnist_cmd="python NeuralNetwork.py grading_train_image.csv grading_train_label.csv grading_test_image.csv"
	ta_cmd="python NeuralNetwork.py grading_train_image.csv grading_train_label.csv additional_test_image.csv"
	"""
	# stanza.download()
	# http://nlp.stanford.edu/software/stanza/1.0.0/en/default.zip

	Example usage:
	CUDA_VISIBLE_DEVICES=1 \
	python parsing.py \
	--input_corpus_path ./corpora/gkb_best_sent.txt \
	--output_json_path ./parses/gkb_best.parses.jsonl \
	--prefix gkb_best --num_shards 10000 --shard_id 0
	from argparse import Namespace

	from numpy.core.defchararray import index
	from semanticdebugger.debug_algs.cl_simple_alg import ContinualFinetuning
	from tqdm import tqdm
	import torch
	from semanticdebugger.models.utils import trim_batch
	import json
	from semanticdebugger.debug_algs import run_lifelong_finetune
	#!/bin/bash
	# Usage: bash mv.sh /path/to/src/ /path/to/target/

	sourcedir=$1
	targetdir=$2

	filecount=$(find $sourcedir \| wc -l)
	echo $filecount # Print the number of the total files.
	mkdir $targetdir
	mv -v $sourcedir $targetdir \| pv -l -s $filecount > /tmp/mv_log.txt
	from sentence_transformers import SentenceTransformer # pip install -U sentence-transformers
	from sklearn.cluster import KMeans
	from collections import defaultdict

	INPUT_FILE = "/tmp/test_input.txt"

	with open(INPUT_FILE, "r") as f:
	lines = f.read().splitlines()

	print(len(lines))
	import torch
	import numpy as np
	from tqdm import tqdm
	from fairseq.models.roberta import RobertaModel
	from fairseq.data.data_utils import collate_tokens
	from torch.utils.data import DataLoader, SequentialSampler

	roberta = torch.hub.load('pytorch/fairseq', 'roberta.large.mnli')
	roberta.eval()
	roberta.cuda()