Skip to content

Instantly share code, notes, and snippets.

View wannaphong's full-sized avatar

Wannaphong Phatthiyaphaibun wannaphong

View GitHub Profile
@HughP
HughP / UAX_29.py
Created March 30, 2015 06:10
PyICU
# We start by loading up PyICU.
import PyICU as icu
# Let's create a test text. Notice it contains some punctuation.
test = u"This is (\"a\") test!"
# We create a wordbreak iterator. All break iterators in ICU are really RuleBasedBreakIterators, and we need to tell it which locale to take the word break rules from. Most locales have the same rules for UAX#29 so we will use English.
wb = icu.BreakIterator.createWordInstance(icu.Locale.getEnglish())
# An iterator is just that. It contains state and then we iterate over it. The state in this case is the text we want to break. So we set that.
@fheisler
fheisler / q.py
Created March 31, 2015 23:02
Q-learning Tic-tac-toe
import random
class TicTacToe:
def __init__(self, playerX, playerO):
self.board = [' ']*9
self.playerX, self.playerO = playerX, playerO
self.playerX_turn = random.choice([True, False])
def play_game(self):
@stewartpark
stewartpark / xor.py
Created October 12, 2015 08:17
Simple XOR learning with keras
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import SGD
import numpy as np
X = np.array([[0,0],[0,1],[1,0],[1,1]])
y = np.array([[0],[1],[1],[0]])
model = Sequential()
model.add(Dense(8, input_dim=2))
@avinassh
avinassh / container.xml
Created October 27, 2015 05:38 — forked from anqxyr/archived
Create EPUB files with Python
<?xml version='1.0' encoding='UTF-8'?>
<container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
<rootfiles>
<rootfile media-type="application/oebps-package+xml" full-path="content.opf"/>
</rootfiles>
</container>
@korakot
korakot / LK82.py
Last active April 13, 2020 04:16
Thai Soundex LK82, Udom83
# ตาม guru.sanook.com/1520
import re
t1 = str.maketrans("กขฃคฅฆงจฉชฌซศษสญยฎดฏตณนฐฑฒถทธบปผพภฝฟมรลฬฤฦวหฮอ",
"กกกกกกงจชชชซซซซยยดดตตนนททททททบปพพพฟฟมรรรรรวหหอ")
t2 = str.maketrans(
"กขฃคฅฆงจฉชซฌฎฏฐฑฒดตถทธศษสญณนรลฬฤฦบปพฟภผฝมำยวไใหฮาๅึืเแโุูอ",
"1111112333333333333333333444444445555555667777889AAABCDEEF")
def LK82(s):
res = []
@korakot
korakot / multicut.py
Last active February 1, 2018 09:39
Return all possible ways to cut(tokenize) Thai text.
import re
from collections import defaultdict
from marisa_trie import Trie
wordlist = [li.strip() for li in open('wordlist.txt')]
trie = Trie(wordlist) # สร้างครั้งเดียว ข้างนอก function
class LatticeString(str):
''' String subclass เพื่อเก็บวิธีตัดหลายๆ วิธี
'''
@korakot
korakot / lmcut.py
Last active January 18, 2020 06:43
Longest matching Thai word tokenization
from marisa_trie import Trie
# wordlist = ...
trie = Trie(wordlist)
def lmcut(text):
for w in reversed(trie.prefixes(text)):
if w==text:
yield [w]
else:
@korakot
korakot / collation.py
Last active October 26, 2017 13:04
A simplified pure-python thaisort
# -*- coding: utf-8 -*-
from __future__ import absolute_import, unicode_literals, print_function
import re
try:
import icu
thkey = icu.Collator.createInstance(icu.Locale('th_TH')).getSortKey
except ImportError:
def thkey(word):
cv = re.sub('[็-์]', '', word) # remove tone
@korakot
korakot / thai_datetime.py
Created October 30, 2017 13:45
Thai datetime in python
import datetime, pytz
tz = pytz.timezone('Asia/Bangkok')
def now():
now1 = datetime.datetime.now(tz)
month_name = 'x มกราคม กุมภาพันธ์ มีนาคม เมษายน พฤษภาคม มิถุนายน กรกฎาคม สิงหาคม กันยายน ตุลาคม พฤศจิกายน ธันวาคม'.split()[now1.month]
thai_year = now1.year + 543
time_str = now1.strftime('%H:%M:%S')
return "%d %s %d %s"%(now1.day, month_name, thai_year, time_str) # 30 ตุลาคม 2560 20:45:30
@korakot
korakot / colab_download.py
Created November 15, 2017 08:40
Google colab file upload/download
files.download('example.txt') # from colab to browser download