Wannaphong Phatthiyaphaibun wannaphong

## sft_trainer.py
# This is a modified version of TRL's `SFTTrainer` example (https://github.com/huggingface/trl/blob/main/examples/scripts/sft_trainer.py),
# adapted to run with DeepSpeed ZeRO-3 and Mistral-7B-V1.0. The settings below were run on 1 node of 8 x A100 (80GB) GPUs.
#
# Usage:
#   - Install the latest transformers & accelerate versions: `pip install -U transformers accelerate`
#   - Install deepspeed: `pip install deepspeed==0.9.5`
#   - Install TRL from main: pip install git+https://github.com/huggingface/trl.git
#   - Clone the repo: git clone github.com/huggingface/trl.git
#   - Copy this Gist into trl/examples/scripts
#   - Run from root of trl repo with: accelerate launch --config_file=examples/accelerate_configs/deepspeed_zero3.yaml --gradient_accumulation_steps 8 examples/scripts/sft_trainer.py

## build.yml
name: Minimal Android CI Workflow

on:
  push:
    branches:
      - master
    tags:
      - 'v*'

jobs:

## colab_download.py
files.download('example.txt')  # from colab to browser download

## thai_datetime.py
import datetime, pytz
tz = pytz.timezone('Asia/Bangkok')

def now():
    now1 = datetime.datetime.now(tz)
    month_name = 'x มกราคม กุมภาพันธ์ มีนาคม เมษายน พฤษภาคม มิถุนายน กรกฎาคม สิงหาคม กันยายน ตุลาคม พฤศจิกายน ธันวาคม'.split()[now1.month]
    thai_year = now1.year + 543
    time_str = now1.strftime('%H:%M:%S')
    return "%d %s %d %s"%(now1.day, month_name, thai_year, time_str) # 30 ตุลาคม 2560 20:45:30

## collation.py
# -*- coding: utf-8 -*-
from __future__ import absolute_import, unicode_literals, print_function
import re

try:
    import icu
    thkey = icu.Collator.createInstance(icu.Locale('th_TH')).getSortKey
except ImportError:
    def thkey(word):
        cv = re.sub('[็-์]', '', word) # remove tone

## lmcut.py
from marisa_trie import Trie

# wordlist = ...
trie = Trie(wordlist)

def lmcut(text):
  for w in reversed(trie.prefixes(text)):
    if w==text:
      yield [w]
    else:

## multicut.py
import re
from collections import defaultdict
from marisa_trie import Trie

wordlist = [li.strip() for li in open('wordlist.txt')]
trie = Trie(wordlist)   # สร้างครั้งเดียว ข้างนอก function

class LatticeString(str):
    ''' String subclass เพื่อเก็บวิธีตัดหลายๆ วิธี
    '''

## LK82.py
# ตาม guru.sanook.com/1520
import re
t1 = str.maketrans("กขฃคฅฆงจฉชฌซศษสญยฎดฏตณนฐฑฒถทธบปผพภฝฟมรลฬฤฦวหฮอ",
                   "กกกกกกงจชชชซซซซยยดดตตนนททททททบปพพพฟฟมรรรรรวหหอ")
t2 = str.maketrans(
    "กขฃคฅฆงจฉชซฌฎฏฐฑฒดตถทธศษสญณนรลฬฤฦบปพฟภผฝมำยวไใหฮาๅึืเแโุูอ",
    "1111112333333333333333333444444445555555667777889AAABCDEEF")

def LK82(s):
    res = []

## container.xml
<?xml version='1.0' encoding='UTF-8'?>
<container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
  <rootfiles>
    <rootfile media-type="application/oebps-package+xml" full-path="content.opf"/>
  </rootfiles>
</container>

## xor.py
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import SGD
import numpy as np

X = np.array([[0,0],[0,1],[1,0],[1,1]])
y = np.array([[0],[1],[1],[0]])

model = Sequential()
model.add(Dense(8, input_dim=2))
	# This is a modified version of TRL's `SFTTrainer` example (https://github.com/huggingface/trl/blob/main/examples/scripts/sft_trainer.py),
	# adapted to run with DeepSpeed ZeRO-3 and Mistral-7B-V1.0. The settings below were run on 1 node of 8 x A100 (80GB) GPUs.
	#
	# Usage:
	# - Install the latest transformers & accelerate versions: `pip install -U transformers accelerate`
	# - Install deepspeed: `pip install deepspeed==0.9.5`
	# - Install TRL from main: pip install git+https://github.com/huggingface/trl.git
	# - Clone the repo: git clone github.com/huggingface/trl.git
	# - Copy this Gist into trl/examples/scripts
	# - Run from root of trl repo with: accelerate launch --config_file=examples/accelerate_configs/deepspeed_zero3.yaml --gradient_accumulation_steps 8 examples/scripts/sft_trainer.py
	name: Minimal Android CI Workflow

	on:
	push:
	branches:
	- master
	tags:
	- 'v*'

	jobs:
	import datetime, pytz
	tz = pytz.timezone('Asia/Bangkok')

	def now():
	now1 = datetime.datetime.now(tz)
	month_name = 'x มกราคม กุมภาพันธ์ มีนาคม เมษายน พฤษภาคม มิถุนายน กรกฎาคม สิงหาคม กันยายน ตุลาคม พฤศจิกายน ธันวาคม'.split()[now1.month]
	thai_year = now1.year + 543
	time_str = now1.strftime('%H:%M:%S')
	return "%d %s %d %s"%(now1.day, month_name, thai_year, time_str) # 30 ตุลาคม 2560 20:45:30
	# -- coding: utf-8 --
	from __future__ import absolute_import, unicode_literals, print_function
	import re

	try:
	import icu
	thkey = icu.Collator.createInstance(icu.Locale('th_TH')).getSortKey
	except ImportError:
	def thkey(word):
	cv = re.sub('[็-์]', '', word) # remove tone
	from marisa_trie import Trie

	# wordlist = ...
	trie = Trie(wordlist)

	def lmcut(text):
	for w in reversed(trie.prefixes(text)):
	if w==text:
	yield [w]
	else:
	import re
	from collections import defaultdict
	from marisa_trie import Trie

	wordlist = [li.strip() for li in open('wordlist.txt')]
	trie = Trie(wordlist) # สร้างครั้งเดียว ข้างนอก function

	class LatticeString(str):
	''' String subclass เพื่อเก็บวิธีตัดหลายๆ วิธี
	'''
	# ตาม guru.sanook.com/1520
	import re
	t1 = str.maketrans("กขฃคฅฆงจฉชฌซศษสญยฎดฏตณนฐฑฒถทธบปผพภฝฟมรลฬฤฦวหฮอ",
	"กกกกกกงจชชชซซซซยยดดตตนนททททททบปพพพฟฟมรรรรรวหหอ")
	t2 = str.maketrans(
	"กขฃคฅฆงจฉชซฌฎฏฐฑฒดตถทธศษสญณนรลฬฤฦบปพฟภผฝมำยวไใหฮาๅึืเแโุูอ",
	"1111112333333333333333333444444445555555667777889AAABCDEEF")

	def LK82(s):
	res = []
	<?xml version='1.0' encoding='UTF-8'?>
	<container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
	<rootfiles>
	<rootfile media-type="application/oebps-package+xml" full-path="content.opf"/>
	</rootfiles>
	</container>
	from keras.models import Sequential
	from keras.layers.core import Dense, Dropout, Activation
	from keras.optimizers import SGD
	import numpy as np

	X = np.array([[0,0],[0,1],[1,0],[1,1]])
	y = np.array([[0],[1],[1],[0]])

	model = Sequential()
	model.add(Dense(8, input_dim=2))